1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-09-03 16:45:17 +00:00

[extractor] Extract chapters from JSON-LD (#2031)

Authored by: iw0nderhow, pukkandan
This commit is contained in:
chris
2022-01-01 22:07:00 +01:00
committed by GitHub
parent 7592749cbe
commit f522573787
2 changed files with 77 additions and 0 deletions

View File

@ -1429,6 +1429,23 @@ class InfoExtractor(object):
continue
info[count_key] = interaction_count
def extract_chapter_information(e):
chapters = [{
'title': part.get('name'),
'start_time': part.get('startOffset'),
'end_time': part.get('endOffset'),
} for part in e.get('hasPart', []) if part.get('@type') == 'Clip']
for idx, (last_c, current_c, next_c) in enumerate(zip(
[{'end_time': 0}] + chapters, chapters, chapters[1:])):
current_c['end_time'] = current_c['end_time'] or next_c['start_time']
current_c['start_time'] = current_c['start_time'] or last_c['end_time']
if None in current_c.values():
self.report_warning(f'Chapter {idx} contains broken data. Not extracting chapters')
return
if chapters:
chapters[-1]['end_time'] = chapters[-1]['end_time'] or info['duration']
info['chapters'] = chapters
def extract_video_object(e):
assert e['@type'] == 'VideoObject'
author = e.get('author')
@ -1452,6 +1469,7 @@ class InfoExtractor(object):
'view_count': int_or_none(e.get('interactionCount')),
})
extract_interaction_statistic(e)
extract_chapter_information(e)
def traverse_json_ld(json_ld, at_top_level=True):
for e in json_ld: