From d3d1ac8eb2f9e96f3d75292e0effe2b1bccece3b Mon Sep 17 00:00:00 2001 From: "Arseniy D." <110495618+AzartX47@users.noreply.github.com> Date: Tue, 19 Aug 2025 13:14:20 -1000 Subject: [PATCH] [ie/steam] Fix extractor (#14008) Closes #14000 Authored by: AzartX47 --- yt_dlp/extractor/steam.py | 91 ++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 54 deletions(-) diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py index b7f8ac3ae7..7d14156678 100644 --- a/yt_dlp/extractor/steam.py +++ b/yt_dlp/extractor/steam.py @@ -1,11 +1,19 @@ +import json import re from .common import InfoExtractor from ..utils import ( ExtractorError, + clean_html, extract_attributes, - get_element_by_class, str_or_none, + url_or_none, +) +from ..utils.traversal import ( + find_element, + find_elements, + traverse_obj, + trim_str, ) @@ -19,44 +27,22 @@ class SteamIE(InfoExtractor): | https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P[0-9]+) ''' - _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' - _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' + _VIDEO_PAGE_TEMPLATE = 'https://store.steampowered.com/video/%s/' + _AGECHECK_TEMPLATE = 'https://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' _TESTS = [{ - 'url': 'http://store.steampowered.com/video/105600/', - 'playlist': [ - { - 'md5': '695242613303ffa2a4c44c9374ddc067', - 'info_dict': { - 'id': '256785003', - 'ext': 'mp4', - 'title': 'Terraria video 256785003', - 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com', - }, - }, - { - 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592', - 'info_dict': { - 'id': '2040428', - 'ext': 'mp4', - 'title': 'Terraria video 2040428', - 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com', - }, - }, - ], + 'url': 'https://store.steampowered.com/video/105600/', 'info_dict': { 'id': '105600', 'title': 'Terraria', }, - 'params': { - 'playlistend': 2, - }, + 'playlist_mincount': 3, }, { 'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/', 'info_dict': { 'id': '271590', - 'title': 'Grand Theft Auto V', + 'title': 'Grand Theft Auto V Legacy', }, - 'playlist_count': 23, + 'playlist_mincount': 26, }] def _real_extract(self, url): @@ -81,32 +67,29 @@ class SteamIE(InfoExtractor): self.report_age_confirmation() webpage = self._download_webpage(video_url, playlist_id) - videos = re.findall(r'(]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage) + app_name = traverse_obj(webpage, ({find_element(cls='apphub_AppName')}, {clean_html})) entries = [] - playlist_title = get_element_by_class('apphub_AppName', webpage) - for movie, movie_id in videos: - if not movie: - continue - movie = extract_attributes(movie) - if not movie_id: - continue - entry = { - 'id': movie_id, - 'title': f'{playlist_title} video {movie_id}', - } + for data_prop in traverse_obj(webpage, ( + {find_elements(cls='highlight_player_item highlight_movie', html=True)}, + ..., {extract_attributes}, 'data-props', {json.loads}, {dict}, + )): formats = [] - if movie: - entry['thumbnail'] = movie.get('data-poster') - for quality in ('', '-hd'): - for ext in ('webm', 'mp4'): - video_url = movie.get(f'data-{ext}{quality}-source') - if video_url: - formats.append({ - 'format_id': ext + quality, - 'url': video_url, - }) - entry['formats'] = formats - entries.append(entry) + if hls_manifest := traverse_obj(data_prop, ('hlsManifest', {url_or_none})): + formats.extend(self._extract_m3u8_formats( + hls_manifest, playlist_id, 'mp4', m3u8_id='hls', fatal=False)) + + for dash_manifest in traverse_obj(data_prop, ('dashManifests', ..., {url_or_none})): + formats.extend(self._extract_mpd_formats( + dash_manifest, playlist_id, mpd_id='dash', fatal=False)) + + movie_id = traverse_obj(data_prop, ('id', {trim_str(start='highlight_movie_')})) + entries.append({ + 'id': movie_id, + 'title': f'{app_name} video {movie_id}', + 'formats': formats, + 'thumbnail': traverse_obj(data_prop, ('screenshot', {url_or_none})), + }) + embedded_videos = re.findall(r'(]+>)', webpage) for evideos in embedded_videos: evideos = extract_attributes(evideos).get('src') @@ -121,7 +104,7 @@ class SteamIE(InfoExtractor): if not entries: raise ExtractorError('Could not find any videos') - return self.playlist_result(entries, playlist_id, playlist_title) + return self.playlist_result(entries, playlist_id, app_name) class SteamCommunityBroadcastIE(InfoExtractor):