1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-22 19:06:38 +00:00

Allow extractors to designate formats/subtitles for impersonation (#13778)

Authored by: bashonly
This commit is contained in:
bashonly 2025-07-20 18:05:43 -05:00 committed by GitHub
parent f9dff95cb1
commit 32809eb2da
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 54 additions and 20 deletions

View File

@ -52,7 +52,7 @@ from .networking.exceptions import (
SSLError,
network_exceptions,
)
from .networking.impersonate import ImpersonateRequestHandler
from .networking.impersonate import ImpersonateRequestHandler, ImpersonateTarget
from .plugins import directories as plugin_directories, load_all_plugins
from .postprocessor import (
EmbedThumbnailPP,
@ -3231,6 +3231,16 @@ class YoutubeDL:
}
else:
params = self.params
impersonate = info.pop('impersonate', None)
# Do not override --impersonate with extractor-specified impersonation
if params.get('impersonate') is None:
available_target, requested_targets = self._parse_impersonate_targets(impersonate)
if available_target:
info['impersonate'] = available_target
elif requested_targets:
self.report_warning(self._unavailable_targets_message(requested_targets), only_once=True)
fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
if not test:
for ph in self._progress_hooks:
@ -4183,6 +4193,31 @@ class YoutubeDL:
for rh in self._request_director.handlers.values()
if isinstance(rh, ImpersonateRequestHandler))
def _parse_impersonate_targets(self, impersonate):
if impersonate in (True, ''):
impersonate = ImpersonateTarget()
requested_targets = [
t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
for t in variadic(impersonate)
] if impersonate else []
available_target = next(filter(self._impersonate_target_available, requested_targets), None)
return available_target, requested_targets
@staticmethod
def _unavailable_targets_message(requested_targets, note=None, is_error=False):
note = note or 'The extractor specified to use impersonation for this download'
specific_targets = ', '.join(filter(None, map(str, requested_targets)))
message = (
'no impersonate target is available' if not specific_targets
else f'none of these impersonate targets are available: {specific_targets}')
return (
f'{note}, but {message}. {"See" if is_error else "If you encounter errors, then see"}'
f' https://github.com/yt-dlp/yt-dlp#impersonation '
f'for information on installing the required dependencies')
def urlopen(self, req):
""" Start an HTTP download """
if isinstance(req, str):

View File

@ -99,7 +99,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
if external_downloader is None:
if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params):
return FFmpegFD
elif external_downloader.lower() != 'native':
elif external_downloader.lower() != 'native' and info_dict.get('impersonate') is None:
ed = get_external_downloader(external_downloader)
if ed.can_download(info_dict, external_downloader):
return ed

View File

@ -27,6 +27,9 @@ class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
request_data = info_dict.get('request_data', None)
request_extensions = {}
if info_dict.get('impersonate') is not None:
request_extensions['impersonate'] = info_dict['impersonate']
class DownloadContext(dict):
__getattr__ = dict.get
@ -109,7 +112,7 @@ class HttpFD(FileDownloader):
if try_call(lambda: range_end >= ctx.content_len):
range_end = ctx.content_len - 1
request = Request(url, request_data, headers)
request = Request(url, request_data, headers, extensions=request_extensions)
has_range = range_start is not None
if has_range:
request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'

View File

@ -38,7 +38,6 @@ from ..networking.exceptions import (
TransportError,
network_exceptions,
)
from ..networking.impersonate import ImpersonateTarget
from ..utils import (
IDENTITY,
JSON_LD_RE,
@ -259,6 +258,11 @@ class InfoExtractor:
* key The key (as hex) used to decrypt fragments.
If `key` is given, any key URI will be ignored
* iv The IV (as hex) used to decrypt fragments
* impersonate Impersonate target(s). Can be any of the following entities:
* an instance of yt_dlp.networking.impersonate.ImpersonateTarget
* a string in the format of CLIENT[:OS]
* a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances
* a boolean value; True means any impersonate target is sufficient
* downloader_options A dictionary of downloader options
(For internal use only)
* http_chunk_size Chunk size for HTTP downloads
@ -336,6 +340,7 @@ class InfoExtractor:
* "name": Name or description of the subtitles
* "http_headers": A dictionary of additional HTTP headers
to add to the request.
* "impersonate": Impersonate target(s); same as the "formats" field
"ext" will be calculated from URL if missing
automatic_captions: Like 'subtitles'; contains automatically generated
captions instead of normal subtitles
@ -884,26 +889,17 @@ class InfoExtractor:
extensions = {}
if impersonate in (True, ''):
impersonate = ImpersonateTarget()
requested_targets = [
t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
for t in variadic(impersonate)
] if impersonate else []
available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None)
available_target, requested_targets = self._downloader._parse_impersonate_targets(impersonate)
if available_target:
extensions['impersonate'] = available_target
elif requested_targets:
message = 'The extractor is attempting impersonation, but '
message += (
'no impersonate target is available' if not str(impersonate)
else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"')
info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation '
'for information on installing the required dependencies')
msg = 'The extractor is attempting impersonation'
if require_impersonation:
raise ExtractorError(f'{message}; {info_msg}', expected=True)
self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True)
raise ExtractorError(
self._downloader._unavailable_targets_message(requested_targets, note=msg, is_error=True),
expected=True)
self.report_warning(
self._downloader._unavailable_targets_message(requested_targets, note=msg), only_once=True)
try:
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions))