From c59ad2b066bbccd3cc4eed580842f961bce7dd4a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Jul 2025 16:34:03 -0500 Subject: [PATCH] [utils] `random_user_agent`: Bump versions (#13543) Closes #5362 Authored by: bashonly --- yt_dlp/extractor/adobepass.py | 8 ++---- yt_dlp/extractor/bilibili.py | 7 ----- yt_dlp/extractor/francaisfacile.py | 13 +-------- yt_dlp/extractor/mitele.py | 2 +- yt_dlp/extractor/sproutvideo.py | 2 +- yt_dlp/extractor/telecinco.py | 13 +-------- yt_dlp/utils/networking.py | 46 +++--------------------------- 7 files changed, 10 insertions(+), 81 deletions(-) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 8c2d9d9340..eb45734ec0 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -48,7 +48,6 @@ MSO_INFO = { 'username_field': 'user', 'password_field': 'passwd', 'login_hostname': 'login.xfinity.com', - 'needs_newer_ua': True, }, 'TWC': { 'name': 'Time Warner Cable | Spectrum', @@ -1379,11 +1378,8 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en @staticmethod def _get_mso_headers(mso_info): - # yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO - # See: https://github.com/yt-dlp/yt-dlp/issues/10848 - return { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0', - } if mso_info.get('needs_newer_ua') else {} + # Not needed currently + return {} @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 2846702f6a..d00ac63176 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -175,13 +175,6 @@ class BilibiliBaseIE(InfoExtractor): else: note = f'Downloading video formats for cid {cid}' - # TODO: remove this patch once utils.networking.random_user_agent() is updated, see #13735 - # playurl requests carrying old UA will be rejected - headers = { - 'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(118,138)}.0.0.0 Safari/537.36', - **(headers or {}), - } - return self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, query=self._sign_wbi(params, bvid), headers=headers, note=note)['data'] diff --git a/yt_dlp/extractor/francaisfacile.py b/yt_dlp/extractor/francaisfacile.py index d3208c2828..c432cf486c 100644 --- a/yt_dlp/extractor/francaisfacile.py +++ b/yt_dlp/extractor/francaisfacile.py @@ -1,9 +1,7 @@ import urllib.parse from .common import InfoExtractor -from ..networking.exceptions import HTTPError from ..utils import ( - ExtractorError, float_or_none, url_or_none, ) @@ -58,16 +56,7 @@ class FrancaisFacileIE(InfoExtractor): def _real_extract(self, url): display_id = urllib.parse.unquote(self._match_id(url)) - - try: # yt-dlp's default user-agents are too old and blocked by the site - webpage = self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient - webpage = self._download_webpage(url, display_id, impersonate=True) + webpage = self._download_webpage(url, display_id) data = self._search_json( r']+\bdata-media-id=[^>]+\btype="application/json"[^>]*>', diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index 0dded38c65..76fef337a2 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -79,7 +79,7 @@ class MiTeleIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_akamai_webpage(url, display_id) + webpage = self._download_webpage(url, display_id) pre_player = self._search_json( r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=', webpage, 'Pre Player', display_id)['prePlayer'] diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index 494042738d..4afa838715 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -99,7 +99,7 @@ class SproutVideoIE(InfoExtractor): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) webpage = self._download_webpage( - url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}), impersonate=True) + url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) data = self._search_json( r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index 2dbe2a7768..a34f2afd4a 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -63,17 +63,6 @@ class TelecincoBaseIE(InfoExtractor): 'http_headers': headers, } - def _download_akamai_webpage(self, url, display_id): - try: # yt-dlp's default user-agents are too old and blocked by akamai - return self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient to bypass akamai - return self._download_webpage(url, display_id, impersonate=True) - class TelecincoIE(TelecincoBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' @@ -151,7 +140,7 @@ class TelecincoIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_akamai_webpage(url, display_id) + webpage = self._download_webpage(url, display_id) article = self._search_json( r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=', webpage, 'article', display_id)['article'] diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py index 9fcab6456f..467312ce75 100644 --- a/yt_dlp/utils/networking.py +++ b/yt_dlp/utils/networking.py @@ -15,48 +15,10 @@ from .traversal import traverse_obj def random_user_agent(): - _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' - _CHROME_VERSIONS = ( - '90.0.4430.212', - '90.0.4430.24', - '90.0.4430.70', - '90.0.4430.72', - '90.0.4430.85', - '90.0.4430.93', - '91.0.4472.101', - '91.0.4472.106', - '91.0.4472.114', - '91.0.4472.124', - '91.0.4472.164', - '91.0.4472.19', - '91.0.4472.77', - '92.0.4515.107', - '92.0.4515.115', - '92.0.4515.131', - '92.0.4515.159', - '92.0.4515.43', - '93.0.4556.0', - '93.0.4577.15', - '93.0.4577.63', - '93.0.4577.82', - '94.0.4606.41', - '94.0.4606.54', - '94.0.4606.61', - '94.0.4606.71', - '94.0.4606.81', - '94.0.4606.85', - '95.0.4638.17', - '95.0.4638.50', - '95.0.4638.54', - '95.0.4638.69', - '95.0.4638.74', - '96.0.4664.18', - '96.0.4664.45', - '96.0.4664.55', - '96.0.4664.93', - '97.0.4692.20', - ) - return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) + USER_AGENT_TMPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36' + # Target versions released within the last ~6 months + CHROME_MAJOR_VERSION_RANGE = (132, 138) + return USER_AGENT_TMPL.format(f'{random.randint(*CHROME_MAJOR_VERSION_RANGE)}.0.0.0') class HTTPHeaderDict(dict):