1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-09-03 00:25:08 +00:00

[extractors] Use new framework for existing embeds (#4307)

`Brightcove` is difficult to migrate because it's subclasses may depend
on the signature of the current functions. So it is left as-is for now

Note: Tests have not been migrated
This commit is contained in:
pukkandan
2022-08-01 06:53:25 +05:30
parent 1e8fe57e5c
commit bfd973ece3
138 changed files with 499 additions and 1909 deletions

View File

@ -3,11 +3,29 @@ from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
)
from ..utils import int_or_none
from ..utils import classproperty, int_or_none
class MangomoloBaseIE(InfoExtractor):
_BASE_REGEX = r'https?://(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
_BASE_REGEX = r'(?:https?:)?//(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
_SLUG = None
@classproperty
def _VALID_URL(cls):
return f'{cls._BASE_REGEX}{cls._SLUG}'
@classproperty
def _EMBED_REGEX(cls):
return [rf'<iframe[^>]+src=(["\'])(?P<url>{cls._VALID_URL}.+?)\1']
def _extract_from_webpage(self, url, webpage):
for res in super()._extract_from_webpage(url, webpage):
yield {
**res,
'_type': 'url_transparent',
'id': self._search_regex(self._SLUG, res['url'], 'id', group='id'),
'uploader': self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader'),
}
def _get_real_id(self, page_id):
return page_id
@ -41,14 +59,15 @@ class MangomoloBaseIE(InfoExtractor):
class MangomoloVideoIE(MangomoloBaseIE):
_TYPE = 'video'
IE_NAME = 'mangomolo:' + _TYPE
_VALID_URL = MangomoloBaseIE._BASE_REGEX + r'video\?.*?\bid=(?P<id>\d+)'
_SLUG = r'video\?.*?\bid=(?P<id>\d+)'
_IS_LIVE = False
class MangomoloLiveIE(MangomoloBaseIE):
_TYPE = 'live'
IE_NAME = 'mangomolo:' + _TYPE
_VALID_URL = MangomoloBaseIE._BASE_REGEX + r'(live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
_SLUG = r'(?:live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
_IS_LIVE = True
def _get_real_id(self, page_id):