mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-09-03 08:35:32 +00:00
[ie] Add _search_nuxt_json
helper (#13386)
* Adds InfoExtractor._search_nuxt_json for webpage extraction * Adds InfoExtractor._resolve_nuxt_array for direct use with payload JSON * Adds yt_dlp.utils.jslib module for Python solutions to common JavaScript libraries * Adds devalue.parse and devalue.parse_iter to jslib utils Ref: *9e503be0f2
*f3fd2aa93d/src/parse.js
Authored by: bashonly, Grub4K Co-authored-by: Simon Sawicki <contact@grub4k.dev>
This commit is contained in:
167
yt_dlp/utils/jslib/devalue.py
Normal file
167
yt_dlp/utils/jslib/devalue.py
Normal file
@ -0,0 +1,167 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import array
|
||||
import base64
|
||||
import datetime as dt
|
||||
import math
|
||||
import re
|
||||
|
||||
from .._utils import parse_iso8601
|
||||
|
||||
TYPE_CHECKING = False
|
||||
if TYPE_CHECKING:
|
||||
import collections.abc
|
||||
import typing
|
||||
|
||||
T = typing.TypeVar('T')
|
||||
|
||||
|
||||
_ARRAY_TYPE_LOOKUP = {
|
||||
'Int8Array': 'b',
|
||||
'Uint8Array': 'B',
|
||||
'Uint8ClampedArray': 'B',
|
||||
'Int16Array': 'h',
|
||||
'Uint16Array': 'H',
|
||||
'Int32Array': 'i',
|
||||
'Uint32Array': 'I',
|
||||
'Float32Array': 'f',
|
||||
'Float64Array': 'd',
|
||||
'BigInt64Array': 'l',
|
||||
'BigUint64Array': 'L',
|
||||
'ArrayBuffer': 'B',
|
||||
}
|
||||
|
||||
|
||||
def parse_iter(parsed: typing.Any, /, *, revivers: dict[str, collections.abc.Callable[[list], typing.Any]] | None = None):
|
||||
# based on https://github.com/Rich-Harris/devalue/blob/f3fd2aa93d79f21746555671f955a897335edb1b/src/parse.js
|
||||
resolved = {
|
||||
-1: None,
|
||||
-2: None,
|
||||
-3: math.nan,
|
||||
-4: math.inf,
|
||||
-5: -math.inf,
|
||||
-6: -0.0,
|
||||
}
|
||||
|
||||
if isinstance(parsed, int) and not isinstance(parsed, bool):
|
||||
if parsed not in resolved or parsed == -2:
|
||||
raise ValueError('invalid integer input')
|
||||
return resolved[parsed]
|
||||
elif not isinstance(parsed, list):
|
||||
raise ValueError('expected int or list as input')
|
||||
elif not parsed:
|
||||
raise ValueError('expected a non-empty list as input')
|
||||
|
||||
if revivers is None:
|
||||
revivers = {}
|
||||
return_value = [None]
|
||||
stack: list[tuple] = [(return_value, 0, 0)]
|
||||
|
||||
while stack:
|
||||
target, index, source = stack.pop()
|
||||
if isinstance(source, tuple):
|
||||
name, source, reviver = source
|
||||
try:
|
||||
resolved[source] = target[index] = reviver(target[index])
|
||||
except Exception as error:
|
||||
yield TypeError(f'failed to parse {source} as {name!r}: {error}')
|
||||
resolved[source] = target[index] = None
|
||||
continue
|
||||
|
||||
if source in resolved:
|
||||
target[index] = resolved[source]
|
||||
continue
|
||||
|
||||
# guard against Python negative indexing
|
||||
if source < 0:
|
||||
yield IndexError(f'invalid index: {source!r}')
|
||||
continue
|
||||
|
||||
try:
|
||||
value = parsed[source]
|
||||
except IndexError as error:
|
||||
yield error
|
||||
continue
|
||||
|
||||
if isinstance(value, list):
|
||||
if value and isinstance(value[0], str):
|
||||
# TODO: implement zips `strict=True`
|
||||
if reviver := revivers.get(value[0]):
|
||||
if value[1] == source:
|
||||
# XXX: avoid infinite loop
|
||||
yield IndexError(f'{value[0]!r} cannot point to itself (index: {source})')
|
||||
continue
|
||||
# inverse order: resolve index, revive value
|
||||
stack.append((target, index, (value[0], value[1], reviver)))
|
||||
stack.append((target, index, value[1]))
|
||||
continue
|
||||
|
||||
elif value[0] == 'Date':
|
||||
try:
|
||||
result = dt.datetime.fromtimestamp(parse_iso8601(value[1]), tz=dt.timezone.utc)
|
||||
except Exception:
|
||||
yield ValueError(f'invalid date: {value[1]!r}')
|
||||
result = None
|
||||
|
||||
elif value[0] == 'Set':
|
||||
result = [None] * (len(value) - 1)
|
||||
for offset, new_source in enumerate(value[1:]):
|
||||
stack.append((result, offset, new_source))
|
||||
|
||||
elif value[0] == 'Map':
|
||||
result = []
|
||||
for key, new_source in zip(*(iter(value[1:]),) * 2):
|
||||
pair = [None, None]
|
||||
stack.append((pair, 0, key))
|
||||
stack.append((pair, 1, new_source))
|
||||
result.append(pair)
|
||||
|
||||
elif value[0] == 'RegExp':
|
||||
# XXX: use jsinterp to translate regex flags
|
||||
# currently ignores `value[2]`
|
||||
result = re.compile(value[1])
|
||||
|
||||
elif value[0] == 'Object':
|
||||
result = value[1]
|
||||
|
||||
elif value[0] == 'BigInt':
|
||||
result = int(value[1])
|
||||
|
||||
elif value[0] == 'null':
|
||||
result = {}
|
||||
for key, new_source in zip(*(iter(value[1:]),) * 2):
|
||||
stack.append((result, key, new_source))
|
||||
|
||||
elif value[0] in _ARRAY_TYPE_LOOKUP:
|
||||
typecode = _ARRAY_TYPE_LOOKUP[value[0]]
|
||||
data = base64.b64decode(value[1])
|
||||
result = array.array(typecode, data).tolist()
|
||||
|
||||
else:
|
||||
yield TypeError(f'invalid type at {source}: {value[0]!r}')
|
||||
result = None
|
||||
else:
|
||||
result = len(value) * [None]
|
||||
for offset, new_source in enumerate(value):
|
||||
stack.append((result, offset, new_source))
|
||||
|
||||
elif isinstance(value, dict):
|
||||
result = {}
|
||||
for key, new_source in value.items():
|
||||
stack.append((result, key, new_source))
|
||||
|
||||
else:
|
||||
result = value
|
||||
|
||||
target[index] = resolved[source] = result
|
||||
|
||||
return return_value[0]
|
||||
|
||||
|
||||
def parse(parsed: typing.Any, /, *, revivers: dict[str, collections.abc.Callable[[typing.Any], typing.Any]] | None = None):
|
||||
generator = parse_iter(parsed, revivers=revivers)
|
||||
while True:
|
||||
try:
|
||||
raise generator.send(None)
|
||||
except StopIteration as error:
|
||||
return error.value
|
Reference in New Issue
Block a user