1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-09-03 00:25:08 +00:00

[extractors] Use new framework for existing embeds (#4307)

`Brightcove` is difficult to migrate because it's subclasses may depend
on the signature of the current functions. So it is left as-is for now

Note: Tests have not been migrated
This commit is contained in:
pukkandan
2022-08-01 06:53:25 +05:30
parent 1e8fe57e5c
commit bfd973ece3
138 changed files with 499 additions and 1909 deletions

View File

@ -11,7 +11,7 @@ from ..utils import (
class GediDigitalIE(InfoExtractor):
_VALID_URL = r'''(?x:(?P<url>(?:https?:)//video\.
_VALID_URL = r'''(?x:(?P<base_url>(?:https?:)//video\.
(?:
(?:
(?:espresso\.)?repubblica
@ -34,6 +34,12 @@ class GediDigitalIE(InfoExtractor):
|lasentinella
)\.gelocal
)\.it(?:/[^/]+){2,4}/(?P<id>\d+))(?:$|[?&].*))'''
_EMBED_REGEX = [rf'''(?x)
(?:
data-frame-src=|
<iframe[^\n]+src=
)
(["'])(?P<url>{_VALID_URL})\1''']
_TESTS = [{
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
'md5': '84658d7fb9e55a6e57ecc77b73137494',
@ -109,22 +115,9 @@ class GediDigitalIE(InfoExtractor):
urls[i] = urljoin(base_url(e), url_basename(e))
return urls
@staticmethod
def _extract_urls(webpage):
entries = [
mobj.group('eurl')
for mobj in re.finditer(r'''(?x)
(?:
data-frame-src=|
<iframe[^\n]+src=
)
(["'])(?P<eurl>%s)\1''' % GediDigitalIE._VALID_URL, webpage)]
return GediDigitalIE._sanitize_urls(entries)
@staticmethod
def _extract_url(webpage):
urls = GediDigitalIE._extract_urls(webpage)
return urls[0] if urls else None
@classmethod
def _extract_embed_urls(cls, url, webpage):
return cls._sanitize_urls(tuple(super()._extract_embed_urls(url, webpage)))
@staticmethod
def _clean_formats(formats):
@ -139,8 +132,7 @@ class GediDigitalIE(InfoExtractor):
formats[:] = clean_formats
def _real_extract(self, url):
video_id = self._match_id(url)
url = self._match_valid_url(url).group('url')
video_id, url = self._match_valid_url(url).group('id', 'base_url')
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta(
['twitter:title', 'og:title'], webpage, fatal=True)