mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-09-03 16:45:17 +00:00
[extractor] Support default implicit graph in JSON-LD (#1983)
Original PR: https://github.com/ytdl-org/youtube-dl/pull/30229 Per W3C JSON-LD v1.1 §4.9 (non-normative ref): When a JSON-LD document's top-level structure is a map that contains no other keys than @graph and optionally @context (properties that are not mapped to an IRI or a keyword are ignored), @graph is considered to express the otherwise implicit default graph. Authored by: zmousm
This commit is contained in:
@ -1451,8 +1451,13 @@ class InfoExtractor(object):
|
||||
})
|
||||
extract_interaction_statistic(e)
|
||||
|
||||
for e in json_ld:
|
||||
if '@context' in e:
|
||||
def traverse_json_ld(json_ld, at_top_level=True):
|
||||
for e in json_ld:
|
||||
if at_top_level and '@context' not in e:
|
||||
continue
|
||||
if at_top_level and set(e.keys()) == {'@context', '@graph'}:
|
||||
traverse_json_ld(variadic(e['@graph'], allowed_types=(dict,)), at_top_level=False)
|
||||
break
|
||||
item_type = e.get('@type')
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
continue
|
||||
@ -1488,7 +1493,7 @@ class InfoExtractor(object):
|
||||
info.update({
|
||||
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||
'title': unescapeHTML(e.get('headline')),
|
||||
'description': unescapeHTML(e.get('articleBody')),
|
||||
'description': unescapeHTML(e.get('articleBody') or e.get('description')),
|
||||
})
|
||||
elif item_type == 'VideoObject':
|
||||
extract_video_object(e)
|
||||
@ -1503,6 +1508,8 @@ class InfoExtractor(object):
|
||||
continue
|
||||
else:
|
||||
break
|
||||
traverse_json_ld(json_ld)
|
||||
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||
|
Reference in New Issue
Block a user