1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-17 08:54:23 +00:00

[ie/umg:de] Rework extractor (#13373)

Authored by: doe1080
This commit is contained in:
doe1080 2025-06-04 04:20:46 +09:00 committed by GitHub
parent e1b6062f8c
commit 4e7c1ea346
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,98 +1,53 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import clean_html
int_or_none, from ..utils.traversal import find_element, traverse_obj
parse_filesize,
parse_iso8601,
)
class UMGDeIE(InfoExtractor): class UMGDeIE(InfoExtractor):
_WORKING = False
IE_NAME = 'umg:de' IE_NAME = 'umg:de'
IE_DESC = 'Universal Music Deutschland' IE_DESC = 'Universal Music Deutschland'
_VALID_URL = r'https?://(?:www\.)?universal-music\.de/[^/]+/videos/[^/?#]+-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?universal-music\.de/[^/?#]+/videos/(?P<slug>[^/?#]+-(?P<id>\d+))'
_TEST = { _TESTS = [{
'url': 'https://www.universal-music.de/sido/videos/jedes-wort-ist-gold-wert-457803', 'url': 'https://www.universal-music.de/sido/videos/jedes-wort-ist-gold-wert-457803',
'md5': 'ebd90f48c80dcc82f77251eb1902634f',
'info_dict': { 'info_dict': {
'id': '457803', 'id': '457803',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Jedes Wort ist Gold wert', 'title': 'Jedes Wort ist Gold wert',
'artists': ['Sido'],
'description': 'md5:df2dbffcff1a74e0a7c9bef4b497aeec',
'display_id': 'jedes-wort-ist-gold-wert-457803',
'duration': 210.0,
'thumbnail': r're:https?://images\.universal-music\.de/img/assets/.+\.jpg',
'timestamp': 1513591800, 'timestamp': 1513591800,
'upload_date': '20171218', 'upload_date': '20171218',
'view_count': int,
}, },
} }, {
'url': 'https://www.universal-music.de/alexander-eder/videos/der-doktor-hat-gesagt-609533',
'info_dict': {
'id': '609533',
'ext': 'mp4',
'title': 'Der Doktor hat gesagt',
'artists': ['Alexander Eder'],
'display_id': 'der-doktor-hat-gesagt-609533',
'duration': 146.0,
'thumbnail': r're:https?://images\.universal-music\.de/img/assets/.+\.jpg',
'timestamp': 1742982100,
'upload_date': '20250326',
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id, video_id = self._match_valid_url(url).group('slug', 'id')
video_data = self._download_json( webpage = self._download_webpage(url, display_id)
'https://graphql.universal-music.de/',
video_id, query={
'query': '''{
universalMusic(channel:16) {
video(id:%s) {
headline
formats {
formatId
url
type
width
height
mimeType
fileSize
}
duration
createdDate
}
}
}''' % video_id})['data']['universalMusic']['video'] # noqa: UP031
title = video_data['headline']
hls_url_template = 'http://mediadelivery.universal-music-services.de/vod/mp4:autofill/storage/' + '/'.join(list(video_id)) + '/content/%s/file/playlist.m3u8'
thumbnails = []
formats = []
def add_m3u8_format(format_id):
formats.extend(self._extract_m3u8_formats(
hls_url_template % format_id, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
for f in video_data.get('formats', []):
f_url = f.get('url')
mime_type = f.get('mimeType')
if not f_url or mime_type == 'application/mxf':
continue
fmt = {
'url': f_url,
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
'filesize': parse_filesize(f.get('fileSize')),
}
f_type = f.get('type')
if f_type == 'Image':
thumbnails.append(fmt)
elif f_type == 'Video':
format_id = f.get('formatId')
if format_id:
fmt['format_id'] = format_id
if mime_type == 'video/mp4':
add_m3u8_format(format_id)
urlh = self._request_webpage(f_url, video_id, fatal=False)
if urlh:
first_byte = urlh.read(1)
if first_byte not in (b'F', b'\x00'):
continue
formats.append(fmt)
if not formats:
for format_id in (867, 836, 940):
add_m3u8_format(format_id)
return { return {
**self._search_json_ld(webpage, display_id),
'id': video_id, 'id': video_id,
'title': title, 'artists': traverse_obj(self._html_search_meta('umg-artist-screenname', webpage), (filter, all)),
'duration': int_or_none(video_data.get('duration')), # The JSON LD description duplicates the title
'timestamp': parse_iso8601(video_data.get('createdDate'), ' '), 'description': traverse_obj(webpage, ({find_element(cls='_3Y0Lj')}, {clean_html})),
'thumbnails': thumbnails, 'display_id': display_id,
'formats': formats, 'formats': self._extract_m3u8_formats(
'https://hls.universal-music.de/get', display_id, 'mp4', query={'id': video_id}),
} }