From 1722c55400ff30bb5aee5dd7a262f0b7e9ce2f0e Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Fri, 13 Jun 2025 08:25:08 +0900 Subject: [PATCH] [ie/hypergryph] Improve metadata extraction (#13415) Closes #13384 Authored by: doe1080, eason1478 Co-authored-by: eason1478 <134664337+eason1478@users.noreply.github.com> --- yt_dlp/extractor/hypergryph.py | 56 +++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/hypergryph.py b/yt_dlp/extractor/hypergryph.py index 1fb2e9a98..f405d14b5 100644 --- a/yt_dlp/extractor/hypergryph.py +++ b/yt_dlp/extractor/hypergryph.py @@ -1,32 +1,66 @@ from .common import InfoExtractor -from ..utils import js_to_json, traverse_obj +from ..utils import ( + ExtractorError, + clean_html, + url_or_none, +) +from ..utils.traversal import subs_list_to_dict, traverse_obj class MonsterSirenHypergryphMusicIE(InfoExtractor): + IE_NAME = 'monstersiren' + IE_DESC = '塞壬唱片' + _API_BASE = 'https://monster-siren.hypergryph.com/api' _VALID_URL = r'https?://monster-siren\.hypergryph\.com/music/(?P\d+)' _TESTS = [{ 'url': 'https://monster-siren.hypergryph.com/music/514562', 'info_dict': { 'id': '514562', 'ext': 'wav', - 'artists': ['塞壬唱片-MSR'], - 'album': 'Flame Shadow', 'title': 'Flame Shadow', + 'album': 'Flame Shadow', + 'artists': ['塞壬唱片-MSR'], + 'description': 'md5:19e2acfcd1b65b41b29e8079ab948053', + 'thumbnail': r're:https?://web\.hycdn\.cn/siren/pic/.+\.jpg', + }, + }, { + 'url': 'https://monster-siren.hypergryph.com/music/514518', + 'info_dict': { + 'id': '514518', + 'ext': 'wav', + 'title': 'Heavenly Me (Instrumental)', + 'album': 'Heavenly Me', + 'artists': ['塞壬唱片-MSR', 'AIYUE blessed : 理名'], + 'description': 'md5:ce790b41c932d1ad72eb791d1d8ae598', + 'thumbnail': r're:https?://web\.hycdn\.cn/siren/pic/.+\.jpg', }, }] def _real_extract(self, url): audio_id = self._match_id(url) - webpage = self._download_webpage(url, audio_id) - json_data = self._search_json( - r'window\.g_initialProps\s*=', webpage, 'data', audio_id, transform_source=js_to_json) + song = self._download_json(f'{self._API_BASE}/song/{audio_id}', audio_id) + if traverse_obj(song, 'code') != 0: + msg = traverse_obj(song, ('msg', {str}, filter)) + raise ExtractorError( + msg or 'API returned an error response', expected=bool(msg)) + + album = None + if album_id := traverse_obj(song, ('data', 'albumCid', {str})): + album = self._download_json( + f'{self._API_BASE}/album/{album_id}/detail', album_id, fatal=False) return { 'id': audio_id, - 'title': traverse_obj(json_data, ('player', 'songDetail', 'name')), - 'url': traverse_obj(json_data, ('player', 'songDetail', 'sourceUrl')), - 'ext': 'wav', 'vcodec': 'none', - 'artists': traverse_obj(json_data, ('player', 'songDetail', 'artists', ...)), - 'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name')), + **traverse_obj(song, ('data', { + 'title': ('name', {str}), + 'artists': ('artists', ..., {str}), + 'subtitles': ({'url': 'lyricUrl'}, all, {subs_list_to_dict(lang='en')}), + 'url': ('sourceUrl', {url_or_none}), + })), + **traverse_obj(album, ('data', { + 'album': ('name', {str}), + 'description': ('intro', {clean_html}), + 'thumbnail': ('coverUrl', {url_or_none}), + })), }