1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-09-04 00:55:15 +00:00

[extractors] Use new framework for existing embeds (#4307)

`Brightcove` is difficult to migrate because it's subclasses may depend
on the signature of the current functions. So it is left as-is for now

Note: Tests have not been migrated
This commit is contained in:
pukkandan
2022-08-01 06:53:25 +05:30
parent 1e8fe57e5c
commit bfd973ece3
138 changed files with 499 additions and 1909 deletions

View File

@ -85,6 +85,7 @@ class VKBaseIE(InfoExtractor):
class VKIE(VKBaseIE):
IE_NAME = 'vk'
IE_DESC = 'VK'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1']
_VALID_URL = r'''(?x)
https?://
(?:
@ -100,6 +101,8 @@ class VKIE(VKBaseIE):
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
)
'''
# https://help.sibnet.ru/?sibnet_video_embed
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1']
_TESTS = [
{
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
@ -344,13 +347,6 @@ class VKIE(VKBaseIE):
'only_matching': True,
}]
@staticmethod
def _extract_sibnet_urls(webpage):
# https://help.sibnet.ru/?sibnet_video_embed
return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
webpage)]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('videoid')
@ -451,7 +447,7 @@ class VKIE(VKBaseIE):
m_rutube.group(1).replace('\\', ''))
return self.url_result(rutube_url)
dailymotion_urls = DailymotionIE._extract_urls(info_page)
dailymotion_urls = DailymotionIE._extract_embed_urls(url, info_page)
if dailymotion_urls:
return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
@ -459,7 +455,7 @@ class VKIE(VKBaseIE):
if odnoklassniki_url:
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
sibnet_urls = self._extract_sibnet_urls(info_page)
sibnet_urls = self._extract_embed_urls(url, info_page)
if sibnet_urls:
return self.url_result(sibnet_urls[0])