From 53ea743a9c158f8ca2d75a09ca44ba68606042d8 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 22 May 2025 17:41:31 -0500 Subject: [PATCH] [ie/youtube] Fix automatic captions for some client combinations (#13268) Fix 32ed5f107c6c641958d1cd2752e130de4db55a13 Authored by: bashonly --- yt_dlp/extractor/youtube/_video.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 9f929664f..840829be6 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -3950,19 +3950,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor): subtitles = {} skipped_subs_clients = set() - prs = traverse_obj(player_responses, ( - # Filter out initial_pr which does not have streamingData (smuggled client context) - lambda _, v: v['streamingData'] and v['captions']['playerCaptionsTracklistRenderer'])) - pctrs = traverse_obj(prs, (..., 'captions', 'playerCaptionsTracklistRenderer', {dict})) + # Only web/mweb clients provide translationLanguages, so include initial_pr in the traversal translation_languages = { - lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1) - for lang in traverse_obj(pctrs, (..., 'translationLanguages', ..., {dict}))} + lang['languageCode']: self._get_text(lang['languageName'], max_runs=1) + for lang in traverse_obj(player_responses, ( + ..., 'captions', 'playerCaptionsTracklistRenderer', 'translationLanguages', + lambda _, v: v['languageCode'] and v['languageName'])) + } # NB: Constructing the full subtitle dictionary is slow get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and ( self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles')) - all_captions = traverse_obj(pctrs, (..., 'captionTracks', ..., {dict})) + # Filter out initial_pr which does not have streamingData (smuggled client context) + prs = traverse_obj(player_responses, ( + lambda _, v: v['streamingData'] and v['captions']['playerCaptionsTracklistRenderer'])) + all_captions = traverse_obj(prs, ( + ..., 'captions', 'playerCaptionsTracklistRenderer', 'captionTracks', ..., {dict})) need_subs_langs = {get_lang_code(sub) for sub in all_captions if sub.get('kind') != 'asr'} need_caps_langs = { remove_start(get_lang_code(sub), 'a-')