1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-09-03 00:25:08 +00:00

[webvtt, extractor/youtube] Extract auto-subs from livestream VODs

Closes #4130

Authored by: pukkandan, fstirlitz
This commit is contained in:
pukkandan
2022-06-22 03:46:54 +05:30
parent 07b47084ba
commit c646d76f67
2 changed files with 36 additions and 16 deletions

View File

@ -161,6 +161,12 @@ class Magic(HeaderBlock):
_REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)')
_REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*')
# This was removed from the spec in the 2017 revision;
# the last spec draft to describe this syntax element is
# <https://www.w3.org/TR/2015/WD-webvtt1-20151208/#webvtt-metadata-header>.
# Nevertheless, YouTube keeps serving those
_REGEX_META = re.compile(r'(?:(?!-->)[^\r\n])+:(?:(?!-->)[^\r\n])+(?:\r\n|[\r\n])')
@classmethod
def __parse_tsmap(cls, parser):
parser = parser.child()
@ -200,13 +206,18 @@ class Magic(HeaderBlock):
raise ParseError(parser)
extra = m.group(1)
local, mpegts = None, None
if parser.consume(cls._REGEX_TSMAP):
local, mpegts = cls.__parse_tsmap(parser)
if not parser.consume(_REGEX_NL):
local, mpegts, meta = None, None, ''
while not parser.consume(_REGEX_NL):
if parser.consume(cls._REGEX_TSMAP):
local, mpegts = cls.__parse_tsmap(parser)
continue
m = parser.consume(cls._REGEX_META)
if m:
meta += m.group(0)
continue
raise ParseError(parser)
parser.commit()
return cls(extra=extra, mpegts=mpegts, local=local)
return cls(extra=extra, mpegts=mpegts, local=local, meta=meta)
def write_into(self, stream):
stream.write('WEBVTT')
@ -219,6 +230,8 @@ class Magic(HeaderBlock):
stream.write(',MPEGTS:')
stream.write(str(self.mpegts if self.mpegts is not None else 0))
stream.write('\n')
if self.meta:
stream.write(self.meta)
stream.write('\n')