1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-09-03 08:35:32 +00:00

[extractors] Use new framework for existing embeds (#4307)

`Brightcove` is difficult to migrate because it's subclasses may depend
on the signature of the current functions. So it is left as-is for now

Note: Tests have not been migrated
This commit is contained in:
pukkandan
2022-08-01 06:53:25 +05:30
parent 1e8fe57e5c
commit bfd973ece3
138 changed files with 499 additions and 1909 deletions

View File

@ -30,7 +30,6 @@ from ..utils import (
unsmuggle_url,
urlencode_postdata,
urljoin,
unescapeHTML,
urlhandle_detect_ext,
)
@ -328,6 +327,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
/?(?:[?&].*)?(?:[#].*)?$
'''
IE_NAME = 'vimeo'
_EMBED_REGEX = [
# iframe
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
# Embedded (swf embed) Vimeo player
r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
# Non-standard embedded Vimeo player
r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
]
_TESTS = [
{
'url': 'http://vimeo.com/56015672#at=0',
@ -729,29 +736,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
# vimeo embed with check-password page protected by Referer header
]
@staticmethod
def _extract_urls(url, webpage):
urls = []
# Look for embedded (iframe) Vimeo player
for mobj in re.finditer(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
webpage):
urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
PLAIN_EMBED_RE = (
# Look for embedded (swf embed) Vimeo player
r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
# Look more for non-standard embedded Vimeo player
r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
)
for embed_re in PLAIN_EMBED_RE:
for mobj in re.finditer(embed_re, webpage):
urls.append(mobj.group('url'))
return urls
@staticmethod
def _extract_url(url, webpage):
urls = VimeoIE._extract_urls(url, webpage)
return urls[0] if urls else None
@classmethod
def _extract_embed_urls(cls, url, webpage):
for embed_url in super()._extract_embed_urls(url, webpage):
yield cls._smuggle_referrer(embed_url, url)
def _verify_player_video_password(self, url, video_id, headers):
password = self._get_video_password()
@ -1386,12 +1374,12 @@ class VimeoLikesIE(VimeoChannelIE):
class VHXEmbedIE(VimeoBaseInfoExtractor):
IE_NAME = 'vhx:embed'
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
_EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://embed\.vhx\.tv/videos/\d+[^"]*)"']
@staticmethod
def _extract_url(url, webpage):
mobj = re.search(
r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
return VimeoIE._smuggle_referrer(unescapeHTML(mobj.group(1)), url) if mobj else None
@classmethod
def _extract_embed_urls(cls, url, webpage):
for embed_url in super()._extract_embed_urls(url, webpage):
yield cls._smuggle_referrer(embed_url, url)
def _real_extract(self, url):
video_id = self._match_id(url)