1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-09-03 16:45:17 +00:00

[extractor] Support default implicit graph in JSON-LD (#1983)

Original PR: https://github.com/ytdl-org/youtube-dl/pull/30229

Per W3C JSON-LD v1.1 §4.9 (non-normative ref):

    When a JSON-LD document's top-level structure is a map that contains
    no other keys than @graph and optionally @context (properties that
    are not mapped to an IRI or a keyword are ignored), @graph is
    considered to express the otherwise implicit default graph.

Authored by: zmousm
This commit is contained in:
Zenon Mousmoulas
2021-12-16 22:46:30 +02:00
committed by GitHub
parent fed1309651
commit d5c3254889
2 changed files with 94 additions and 18 deletions

View File

@ -1451,8 +1451,13 @@ class InfoExtractor(object):
})
extract_interaction_statistic(e)
for e in json_ld:
if '@context' in e:
def traverse_json_ld(json_ld, at_top_level=True):
for e in json_ld:
if at_top_level and '@context' not in e:
continue
if at_top_level and set(e.keys()) == {'@context', '@graph'}:
traverse_json_ld(variadic(e['@graph'], allowed_types=(dict,)), at_top_level=False)
break
item_type = e.get('@type')
if expected_type is not None and expected_type != item_type:
continue
@ -1488,7 +1493,7 @@ class InfoExtractor(object):
info.update({
'timestamp': parse_iso8601(e.get('datePublished')),
'title': unescapeHTML(e.get('headline')),
'description': unescapeHTML(e.get('articleBody')),
'description': unescapeHTML(e.get('articleBody') or e.get('description')),
})
elif item_type == 'VideoObject':
extract_video_object(e)
@ -1503,6 +1508,8 @@ class InfoExtractor(object):
continue
else:
break
traverse_json_ld(json_ld)
return dict((k, v) for k, v in info.items() if v is not None)
def _search_nextjs_data(self, webpage, video_id, **kw):