From 03dba2012d9bd3f402fa8c2f122afba89bbd22a4 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 6 Jun 2025 17:02:26 -0500 Subject: [PATCH] [ie/telecinco] Fix extractor (#13379) Closes #13378 Authored by: bashonly --- yt_dlp/extractor/mitele.py | 14 +------------- yt_dlp/extractor/telecinco.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index 55fa83b51..0dded38c6 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -1,7 +1,5 @@ from .telecinco import TelecincoBaseIE -from ..networking.exceptions import HTTPError from ..utils import ( - ExtractorError, int_or_none, parse_iso8601, ) @@ -81,17 +79,7 @@ class MiTeleIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - - try: # yt-dlp's default user-agents are too old and blocked by akamai - webpage = self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient to bypass akamai - webpage = self._download_webpage(url, display_id, impersonate=True) - + webpage = self._download_akamai_webpage(url, display_id) pre_player = self._search_json( r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=', webpage, 'Pre Player', display_id)['prePlayer'] diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index a34f2afd4..2dbe2a776 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -63,6 +63,17 @@ class TelecincoBaseIE(InfoExtractor): 'http_headers': headers, } + def _download_akamai_webpage(self, url, display_id): + try: # yt-dlp's default user-agents are too old and blocked by akamai + return self._download_webpage(url, display_id, headers={ + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', + }) + except ExtractorError as e: + if not isinstance(e.cause, HTTPError) or e.cause.status != 403: + raise + # Retry with impersonation if hardcoded UA is insufficient to bypass akamai + return self._download_webpage(url, display_id, impersonate=True) + class TelecincoIE(TelecincoBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' @@ -140,7 +151,7 @@ class TelecincoIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + webpage = self._download_akamai_webpage(url, display_id) article = self._search_json( r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=', webpage, 'article', display_id)['article']