mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-09-03 00:25:08 +00:00
[webvtt, extractor/youtube] Extract auto-subs from livestream VODs
Closes #4130 Authored by: pukkandan, fstirlitz
This commit is contained in:
@ -161,6 +161,12 @@ class Magic(HeaderBlock):
|
||||
_REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)')
|
||||
_REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*')
|
||||
|
||||
# This was removed from the spec in the 2017 revision;
|
||||
# the last spec draft to describe this syntax element is
|
||||
# <https://www.w3.org/TR/2015/WD-webvtt1-20151208/#webvtt-metadata-header>.
|
||||
# Nevertheless, YouTube keeps serving those
|
||||
_REGEX_META = re.compile(r'(?:(?!-->)[^\r\n])+:(?:(?!-->)[^\r\n])+(?:\r\n|[\r\n])')
|
||||
|
||||
@classmethod
|
||||
def __parse_tsmap(cls, parser):
|
||||
parser = parser.child()
|
||||
@ -200,13 +206,18 @@ class Magic(HeaderBlock):
|
||||
raise ParseError(parser)
|
||||
|
||||
extra = m.group(1)
|
||||
local, mpegts = None, None
|
||||
if parser.consume(cls._REGEX_TSMAP):
|
||||
local, mpegts = cls.__parse_tsmap(parser)
|
||||
if not parser.consume(_REGEX_NL):
|
||||
local, mpegts, meta = None, None, ''
|
||||
while not parser.consume(_REGEX_NL):
|
||||
if parser.consume(cls._REGEX_TSMAP):
|
||||
local, mpegts = cls.__parse_tsmap(parser)
|
||||
continue
|
||||
m = parser.consume(cls._REGEX_META)
|
||||
if m:
|
||||
meta += m.group(0)
|
||||
continue
|
||||
raise ParseError(parser)
|
||||
parser.commit()
|
||||
return cls(extra=extra, mpegts=mpegts, local=local)
|
||||
return cls(extra=extra, mpegts=mpegts, local=local, meta=meta)
|
||||
|
||||
def write_into(self, stream):
|
||||
stream.write('WEBVTT')
|
||||
@ -219,6 +230,8 @@ class Magic(HeaderBlock):
|
||||
stream.write(',MPEGTS:')
|
||||
stream.write(str(self.mpegts if self.mpegts is not None else 0))
|
||||
stream.write('\n')
|
||||
if self.meta:
|
||||
stream.write(self.meta)
|
||||
stream.write('\n')
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user