From d3d1ac8eb2f9e96f3d75292e0effe2b1bccece3b Mon Sep 17 00:00:00 2001
From: "Arseniy D." <110495618+AzartX47@users.noreply.github.com>
Date: Tue, 19 Aug 2025 13:14:20 -1000
Subject: [PATCH] [ie/steam] Fix extractor (#14008)

Closes #14000
Authored by: AzartX47
---
 yt_dlp/extractor/steam.py | 91 ++++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 54 deletions(-)
diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py
index b7f8ac3ae7..7d14156678 100644
--- a/yt_dlp/extractor/steam.py
+++ b/yt_dlp/extractor/steam.py
@@ -1,11 +1,19 @@
+import json
 import re
 
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
+    clean_html,
     extract_attributes,
-    get_element_by_class,
     str_or_none,
+    url_or_none,
+)
+from ..utils.traversal import (
+    find_element,
+    find_elements,
+    traverse_obj,
+    trim_str,
 )
 
 
@@ -19,44 +27,22 @@ class SteamIE(InfoExtractor):
         |
         https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
     '''
-    _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
-    _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
+    _VIDEO_PAGE_TEMPLATE = 'https://store.steampowered.com/video/%s/'
+    _AGECHECK_TEMPLATE = 'https://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
     _TESTS = [{
-        'url': 'http://store.steampowered.com/video/105600/',
-        'playlist': [
-            {
-                'md5': '695242613303ffa2a4c44c9374ddc067',
-                'info_dict': {
-                    'id': '256785003',
-                    'ext': 'mp4',
-                    'title': 'Terraria video 256785003',
-                    'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
-                },
-            },
-            {
-                'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
-                'info_dict': {
-                    'id': '2040428',
-                    'ext': 'mp4',
-                    'title': 'Terraria video 2040428',
-                    'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
-                },
-            },
-        ],
+        'url': 'https://store.steampowered.com/video/105600/',
         'info_dict': {
             'id': '105600',
             'title': 'Terraria',
         },
-        'params': {
-            'playlistend': 2,
-        },
+        'playlist_mincount': 3,
     }, {
         'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
         'info_dict': {
             'id': '271590',
-            'title': 'Grand Theft Auto V',
+            'title': 'Grand Theft Auto V Legacy',
         },
-        'playlist_count': 23,
+        'playlist_mincount': 26,
     }]
 
     def _real_extract(self, url):
@@ -81,32 +67,29 @@ class SteamIE(InfoExtractor):
             self.report_age_confirmation()
             webpage = self._download_webpage(video_url, playlist_id)
 
-        videos = re.findall(r'(<div[^>]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage)
+        app_name = traverse_obj(webpage, ({find_element(cls='apphub_AppName')}, {clean_html}))
         entries = []
-        playlist_title = get_element_by_class('apphub_AppName', webpage)
-        for movie, movie_id in videos:
-            if not movie:
-                continue
-            movie = extract_attributes(movie)
-            if not movie_id:
-                continue
-            entry = {
-                'id': movie_id,
-                'title': f'{playlist_title} video {movie_id}',
-            }
+        for data_prop in traverse_obj(webpage, (
+            {find_elements(cls='highlight_player_item highlight_movie', html=True)},
+            ..., {extract_attributes}, 'data-props', {json.loads}, {dict},
+        )):
             formats = []
-            if movie:
-                entry['thumbnail'] = movie.get('data-poster')
-                for quality in ('', '-hd'):
-                    for ext in ('webm', 'mp4'):
-                        video_url = movie.get(f'data-{ext}{quality}-source')
-                        if video_url:
-                            formats.append({
-                                'format_id': ext + quality,
-                                'url': video_url,
-                            })
-            entry['formats'] = formats
-            entries.append(entry)
+            if hls_manifest := traverse_obj(data_prop, ('hlsManifest', {url_or_none})):
+                formats.extend(self._extract_m3u8_formats(
+                    hls_manifest, playlist_id, 'mp4', m3u8_id='hls', fatal=False))
+
+            for dash_manifest in traverse_obj(data_prop, ('dashManifests', ..., {url_or_none})):
+                formats.extend(self._extract_mpd_formats(
+                    dash_manifest, playlist_id, mpd_id='dash', fatal=False))
+
+            movie_id = traverse_obj(data_prop, ('id', {trim_str(start='highlight_movie_')}))
+            entries.append({
+                'id': movie_id,
+                'title': f'{app_name} video {movie_id}',
+                'formats': formats,
+                'thumbnail': traverse_obj(data_prop, ('screenshot', {url_or_none})),
+            })
+
         embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
         for evideos in embedded_videos:
             evideos = extract_attributes(evideos).get('src')
@@ -121,7 +104,7 @@ class SteamIE(InfoExtractor):
         if not entries:
             raise ExtractorError('Could not find any videos')
 
-        return self.playlist_result(entries, playlist_id, playlist_title)
+        return self.playlist_result(entries, playlist_id, app_name)
 
 
 class SteamCommunityBroadcastIE(InfoExtractor):