mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-17 08:54:23 +00:00
[ie/telecinco] Fix extractor (#13379)
Closes #13378 Authored by: bashonly
This commit is contained in:
parent
5d96527be8
commit
03dba2012d
@ -1,7 +1,5 @@
|
|||||||
from .telecinco import TelecincoBaseIE
|
from .telecinco import TelecincoBaseIE
|
||||||
from ..networking.exceptions import HTTPError
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
@ -81,17 +79,7 @@ class MiTeleIE(TelecincoBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_akamai_webpage(url, display_id)
|
||||||
try: # yt-dlp's default user-agents are too old and blocked by akamai
|
|
||||||
webpage = self._download_webpage(url, display_id, headers={
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
|
|
||||||
})
|
|
||||||
except ExtractorError as e:
|
|
||||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
|
||||||
raise
|
|
||||||
# Retry with impersonation if hardcoded UA is insufficient to bypass akamai
|
|
||||||
webpage = self._download_webpage(url, display_id, impersonate=True)
|
|
||||||
|
|
||||||
pre_player = self._search_json(
|
pre_player = self._search_json(
|
||||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
|
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
|
||||||
webpage, 'Pre Player', display_id)['prePlayer']
|
webpage, 'Pre Player', display_id)['prePlayer']
|
||||||
|
@ -63,6 +63,17 @@ class TelecincoBaseIE(InfoExtractor):
|
|||||||
'http_headers': headers,
|
'http_headers': headers,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _download_akamai_webpage(self, url, display_id):
|
||||||
|
try: # yt-dlp's default user-agents are too old and blocked by akamai
|
||||||
|
return self._download_webpage(url, display_id, headers={
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
|
||||||
|
})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
||||||
|
raise
|
||||||
|
# Retry with impersonation if hardcoded UA is insufficient to bypass akamai
|
||||||
|
return self._download_webpage(url, display_id, impersonate=True)
|
||||||
|
|
||||||
|
|
||||||
class TelecincoIE(TelecincoBaseIE):
|
class TelecincoIE(TelecincoBaseIE):
|
||||||
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||||
@ -140,7 +151,7 @@ class TelecincoIE(TelecincoBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_akamai_webpage(url, display_id)
|
||||||
article = self._search_json(
|
article = self._search_json(
|
||||||
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
|
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
|
||||||
webpage, 'article', display_id)['article']
|
webpage, 'article', display_id)['article']
|
||||||
|
Loading…
x
Reference in New Issue
Block a user