[extractor] Extract chapters from JSON-LD (#2031)

Authored by: iw0nderhow, pukkandan
2025-09-03 16:45:17 +00:00 · 2022-01-01 22:07:00 +01:00
parent 7592749cbe
commit f522573787
2 changed files with 77 additions and 0 deletions
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -1429,6 +1429,23 @@ class InfoExtractor(object):
                    continue
                info[count_key] = interaction_count

+        def extract_chapter_information(e):
+            chapters = [{
+                'title': part.get('name'),
+                'start_time': part.get('startOffset'),
+                'end_time': part.get('endOffset'),
+            } for part in e.get('hasPart', []) if part.get('@type') == 'Clip']
+            for idx, (last_c, current_c, next_c) in enumerate(zip(
+                    [{'end_time': 0}] + chapters, chapters, chapters[1:])):
+                current_c['end_time'] = current_c['end_time'] or next_c['start_time']
+                current_c['start_time'] = current_c['start_time'] or last_c['end_time']
+                if None in current_c.values():
+                    self.report_warning(f'Chapter {idx} contains broken data. Not extracting chapters')
+                    return
+            if chapters:
+                chapters[-1]['end_time'] = chapters[-1]['end_time'] or info['duration']
+                info['chapters'] = chapters
+
        def extract_video_object(e):
            assert e['@type'] == 'VideoObject'
            author = e.get('author')
@ -1452,6 +1469,7 @@ class InfoExtractor(object):
                'view_count': int_or_none(e.get('interactionCount')),
            })
            extract_interaction_statistic(e)
+            extract_chapter_information(e)

        def traverse_json_ld(json_ld, at_top_level=True):
            for e in json_ld: