1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-09-03 00:25:08 +00:00

[networking] Remove dot segments during URL normalization (#7662)

This implements RFC3986 5.2.4 remove_dot_segments during the URL normalization process.

Closes #3355, #6526

Authored by: coletdjnz
This commit is contained in:
coletdjnz
2023-07-29 10:40:20 +12:00
committed by GitHub
parent a15fcd299e
commit 4bf912282a
8 changed files with 104 additions and 36 deletions

View File

@ -2464,23 +2464,6 @@ def lowercase_escape(s):
s)
def escape_rfc3986(s):
"""Escape non-ASCII characters as suggested by RFC 3986"""
return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
def escape_url(url):
"""Escape URL as suggested by RFC 3986"""
url_parsed = urllib.parse.urlparse(url)
return url_parsed._replace(
netloc=url_parsed.netloc.encode('idna').decode('ascii'),
path=escape_rfc3986(url_parsed.path),
params=escape_rfc3986(url_parsed.params),
query=escape_rfc3986(url_parsed.query),
fragment=escape_rfc3986(url_parsed.fragment)
).geturl()
def parse_qs(url, **kwargs):
return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)