1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-09-03 16:45:17 +00:00

[extractor/rutube] Extract chapters from description (#6345)

Authored by: mushbite
This commit is contained in:
mushbite
2023-03-04 15:33:17 +02:00
committed by GitHub
parent 08ff6d59f9
commit 22ccd5420b
3 changed files with 66 additions and 42 deletions

View File

@ -3649,6 +3649,38 @@ class InfoExtractor:
or urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
or default)
def _extract_chapters_helper(self, chapter_list, start_function, title_function, duration, strict=True):
if not duration:
return
chapter_list = [{
'start_time': start_function(chapter),
'title': title_function(chapter),
} for chapter in chapter_list or []]
if not strict:
chapter_list.sort(key=lambda c: c['start_time'] or 0)
chapters = [{'start_time': 0}]
for idx, chapter in enumerate(chapter_list):
if chapter['start_time'] is None:
self.report_warning(f'Incomplete chapter {idx}')
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
chapters.append(chapter)
elif chapter not in chapters:
self.report_warning(
f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
return chapters[1:]
def _extract_chapters_from_description(self, description, duration):
duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
return self._extract_chapters_helper(
re.findall(sep_re % (duration_re, r'.+?'), description or ''),
start_function=lambda x: parse_duration(x[0]), title_function=lambda x: x[1],
duration=duration, strict=False) or self._extract_chapters_helper(
re.findall(sep_re % (r'.+?', duration_re), description or ''),
start_function=lambda x: parse_duration(x[1]), title_function=lambda x: x[0],
duration=duration, strict=False)
@staticmethod
def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
all_known = all(map(