1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-04-19 07:26:07 +00:00

[ie/rumble] Improve format extraction (#12838)

Closes #12837
Authored by: seproDev
This commit is contained in:
sepro 2025-04-05 20:29:57 +02:00 committed by GitHub
parent 4ebf41309d
commit 58d0c83457
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -7,7 +7,6 @@ from ..utils import (
ExtractorError, ExtractorError,
UnsupportedError, UnsupportedError,
clean_html, clean_html,
determine_ext,
extract_attributes, extract_attributes,
format_field, format_field,
get_element_by_class, get_element_by_class,
@ -36,7 +35,7 @@ class RumbleEmbedIE(InfoExtractor):
'upload_date': '20191020', 'upload_date': '20191020',
'channel_url': 'https://rumble.com/c/WMAR', 'channel_url': 'https://rumble.com/c/WMAR',
'channel': 'WMAR', 'channel': 'WMAR',
'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg', 'thumbnail': r're:https://.+\.jpg',
'duration': 234, 'duration': 234,
'uploader': 'WMAR', 'uploader': 'WMAR',
'live_status': 'not_live', 'live_status': 'not_live',
@ -52,7 +51,7 @@ class RumbleEmbedIE(InfoExtractor):
'upload_date': '20220217', 'upload_date': '20220217',
'channel_url': 'https://rumble.com/c/CyberTechNews', 'channel_url': 'https://rumble.com/c/CyberTechNews',
'channel': 'CTNews', 'channel': 'CTNews',
'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg', 'thumbnail': r're:https://.+\.jpg',
'duration': 901, 'duration': 901,
'uploader': 'CTNews', 'uploader': 'CTNews',
'live_status': 'not_live', 'live_status': 'not_live',
@ -114,6 +113,22 @@ class RumbleEmbedIE(InfoExtractor):
'live_status': 'was_live', 'live_status': 'was_live',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, {
'url': 'https://rumble.com/embed/v6pezdb',
'info_dict': {
'id': 'v6pezdb',
'ext': 'mp4',
'title': '"Es war einmal ein Mädchen" Ein filmisches Zeitzeugnis aus Leningrad 1944',
'uploader': 'RT DE',
'channel': 'RT DE',
'channel_url': 'https://rumble.com/c/RTDE',
'duration': 309,
'thumbnail': 'https://1a-1791.com/video/fww1/dc/s8/1/n/z/2/y/nz2yy.qR4e-small-Es-war-einmal-ein-Mdchen-Ei.jpg',
'timestamp': 1743703500,
'upload_date': '20250403',
'live_status': 'not_live',
},
'params': {'skip_download': True},
}, { }, {
'url': 'https://rumble.com/embed/ufe9n.v5pv5f', 'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
'only_matching': True, 'only_matching': True,
@ -168,40 +183,42 @@ class RumbleEmbedIE(InfoExtractor):
live_status = None live_status = None
formats = [] formats = []
for ext, ext_info in (video.get('ua') or {}).items(): for format_type, format_info in (video.get('ua') or {}).items():
if isinstance(ext_info, dict): if isinstance(format_info, dict):
for height, video_info in ext_info.items(): for height, video_info in format_info.items():
if not traverse_obj(video_info, ('meta', 'h', {int_or_none})): if not traverse_obj(video_info, ('meta', 'h', {int_or_none})):
video_info.setdefault('meta', {})['h'] = height video_info.setdefault('meta', {})['h'] = height
ext_info = ext_info.values() format_info = format_info.values()
for video_info in ext_info: for video_info in format_info:
meta = video_info.get('meta') or {} meta = video_info.get('meta') or {}
if not video_info.get('url'): if not video_info.get('url'):
continue continue
if ext == 'hls': # With default query params returns m3u8 variants which are duplicates, without returns tar files
if format_type == 'tar':
continue
if format_type == 'hls':
if meta.get('live') is True and video.get('live') == 1: if meta.get('live') is True and video.get('live') == 1:
live_status = 'post_live' live_status = 'post_live'
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_info['url'], video_id, video_info['url'], video_id,
ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live')) ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
continue continue
timeline = ext == 'timeline' is_timeline = format_type == 'timeline'
if timeline: is_audio = format_type == 'audio'
ext = determine_ext(video_info['url'])
formats.append({ formats.append({
'ext': ext, 'acodec': 'none' if is_timeline else None,
'acodec': 'none' if timeline else None, 'vcodec': 'none' if is_audio else None,
'url': video_info['url'], 'url': video_info['url'],
'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')), 'format_id': join_nonempty(format_type, format_field(meta, 'h', '%sp')),
'format_note': 'Timeline' if timeline else None, 'format_note': 'Timeline' if is_timeline else None,
'fps': None if timeline else video.get('fps'), 'fps': None if is_timeline or is_audio else video.get('fps'),
**traverse_obj(meta, { **traverse_obj(meta, {
'tbr': 'bitrate', 'tbr': ('bitrate', {int_or_none}),
'filesize': 'size', 'filesize': ('size', {int_or_none}),
'width': 'w', 'width': ('w', {int_or_none}),
'height': 'h', 'height': ('h', {int_or_none}),
}, expected_type=lambda x: int(x) or None), }),
}) })
subtitles = { subtitles = {