mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-05-07 22:39:45 +00:00
[ie/bitchute] Fix extractor (#13081)
Closes #13080 Authored by: bashonly
This commit is contained in:
parent
17cf9088d0
commit
1d0f6539c4
@ -1,30 +1,32 @@
|
|||||||
import functools
|
import functools
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..networking import HEADRequest
|
from ..networking import HEADRequest
|
||||||
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
clean_html,
|
clean_html,
|
||||||
extract_attributes,
|
determine_ext,
|
||||||
|
format_field,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
get_element_by_id,
|
|
||||||
get_element_html_by_class,
|
|
||||||
get_elements_html_by_class,
|
get_elements_html_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_count,
|
parse_count,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
traverse_obj,
|
parse_iso8601,
|
||||||
unified_strdate,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class BitChuteIE(InfoExtractor):
|
class BitChuteIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/?#]+)/(?P<id>[^/?#&]+)'
|
||||||
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
|
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
||||||
@ -34,12 +36,17 @@ class BitChuteIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'This is the first video on #BitChute !',
|
'title': 'This is the first video on #BitChute !',
|
||||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||||
'uploader': 'BitChute',
|
'uploader': 'BitChute',
|
||||||
'upload_date': '20170103',
|
'upload_date': '20170103',
|
||||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||||
'channel': 'BitChute',
|
'channel': 'BitChute',
|
||||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||||
|
'uploader_id': 'I5NgtHZn9vPj',
|
||||||
|
'channel_id': '1VBwRfyNcKdX',
|
||||||
|
'view_count': int,
|
||||||
|
'duration': 16.0,
|
||||||
|
'timestamp': 1483425443,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# test case: video with different channel and uploader
|
# test case: video with different channel and uploader
|
||||||
@ -49,13 +56,18 @@ class BitChuteIE(InfoExtractor):
|
|||||||
'id': 'Yti_j9A-UZ4',
|
'id': 'Yti_j9A-UZ4',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Israel at War | Full Measure',
|
'title': 'Israel at War | Full Measure',
|
||||||
'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
|
'description': 'md5:e60198b89971966d6030d22b3268f08f',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||||
'uploader': 'sharylattkisson',
|
'uploader': 'sharylattkisson',
|
||||||
'upload_date': '20231106',
|
'upload_date': '20231106',
|
||||||
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
|
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
|
||||||
'channel': 'Full Measure with Sharyl Attkisson',
|
'channel': 'Full Measure with Sharyl Attkisson',
|
||||||
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
|
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
|
||||||
|
'uploader_id': '9K0kUWA9zmd9',
|
||||||
|
'channel_id': 'NpdxoCRv3ZLb',
|
||||||
|
'view_count': int,
|
||||||
|
'duration': 554.0,
|
||||||
|
'timestamp': 1699296106,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# video not downloadable in browser, but we can recover it
|
# video not downloadable in browser, but we can recover it
|
||||||
@ -66,25 +78,21 @@ class BitChuteIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'filesize': 71537926,
|
'filesize': 71537926,
|
||||||
'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control',
|
'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control',
|
||||||
'description': 'md5:228ee93bd840a24938f536aeac9cf749',
|
'description': 'md5:2029c7c212ccd4b040f52bb2d036ef4e',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||||
'uploader': 'BitChute',
|
'uploader': 'BitChute',
|
||||||
'upload_date': '20181113',
|
'upload_date': '20181113',
|
||||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||||
'channel': 'BitChute',
|
'channel': 'BitChute',
|
||||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||||
|
'uploader_id': 'I5NgtHZn9vPj',
|
||||||
|
'channel_id': '1VBwRfyNcKdX',
|
||||||
|
'view_count': int,
|
||||||
|
'duration': 1701.0,
|
||||||
|
'tags': ['bitchute'],
|
||||||
|
'timestamp': 1542130287,
|
||||||
},
|
},
|
||||||
'params': {'check_formats': None},
|
'params': {'check_formats': None},
|
||||||
}, {
|
|
||||||
# restricted video
|
|
||||||
'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'WEnQU7XGcTdl',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft',
|
|
||||||
},
|
|
||||||
'params': {'skip_download': True},
|
|
||||||
'skip': 'Georestricted in DE',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -96,11 +104,8 @@ class BitChuteIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
_UPLOADER_URL_TMPL = 'https://www.bitchute.com/profile/%s/'
|
||||||
_HEADERS = {
|
_CHANNEL_URL_TMPL = 'https://www.bitchute.com/channel/%s/'
|
||||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
|
||||||
'Referer': 'https://www.bitchute.com/',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _check_format(self, video_url, video_id):
|
def _check_format(self, video_url, video_id):
|
||||||
urls = orderedSet(
|
urls = orderedSet(
|
||||||
@ -112,7 +117,7 @@ class BitChuteIE(InfoExtractor):
|
|||||||
for url in urls:
|
for url in urls:
|
||||||
try:
|
try:
|
||||||
response = self._request_webpage(
|
response = self._request_webpage(
|
||||||
HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS)
|
HEADRequest(url), video_id=video_id, note=f'Checking {url}')
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}')
|
self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}')
|
||||||
continue
|
continue
|
||||||
@ -121,54 +126,79 @@ class BitChuteIE(InfoExtractor):
|
|||||||
'filesize': int_or_none(response.headers.get('Content-Length')),
|
'filesize': int_or_none(response.headers.get('Content-Length')),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _raise_if_restricted(self, webpage):
|
def _call_api(self, endpoint, data, display_id, fatal=True):
|
||||||
page_title = clean_html(get_element_by_class('page-title', webpage)) or ''
|
note = endpoint.rpartition('/')[2]
|
||||||
if re.fullmatch(r'(?:Channel|Video) Restricted', page_title):
|
try:
|
||||||
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
|
return self._download_json(
|
||||||
self.raise_geo_restricted(reason)
|
f'https://api.bitchute.com/api/beta/{endpoint}', display_id,
|
||||||
|
f'Downloading {note} API JSON', f'Unable to download {note} API JSON',
|
||||||
@staticmethod
|
data=json.dumps(data).encode(),
|
||||||
def _make_url(html):
|
headers={
|
||||||
path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
|
'Accept': 'application/json',
|
||||||
return urljoin('https://www.bitchute.com', path)
|
'Content-Type': 'application/json',
|
||||||
|
})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||||
|
errors = '. '.join(traverse_obj(e.cause.response.read().decode(), (
|
||||||
|
{json.loads}, 'errors', lambda _, v: v['context'] == 'reason', 'message', {str})))
|
||||||
|
if errors and 'location' in errors:
|
||||||
|
# Can always be fatal since the video/media call will reach this code first
|
||||||
|
self.raise_geo_restricted(errors)
|
||||||
|
if fatal:
|
||||||
|
raise
|
||||||
|
self.report_warning(e.msg)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
data = {'video_id': video_id}
|
||||||
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
media_url = self._call_api('video/media', data, video_id)['media_url']
|
||||||
|
|
||||||
self._raise_if_restricted(webpage)
|
|
||||||
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
|
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_ in traverse_obj(entries, (0, 'formats', ...)):
|
if determine_ext(media_url) == 'm3u8':
|
||||||
|
formats.extend(
|
||||||
|
self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls', live=True))
|
||||||
|
else:
|
||||||
if self.get_param('check_formats') is not False:
|
if self.get_param('check_formats') is not False:
|
||||||
format_.update(self._check_format(format_.pop('url'), video_id) or {})
|
if fmt := self._check_format(media_url, video_id):
|
||||||
if 'url' not in format_:
|
formats.append(fmt)
|
||||||
continue
|
else:
|
||||||
formats.append(format_)
|
formats.append({'url': media_url})
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
self.raise_no_formats(
|
self.raise_no_formats(
|
||||||
'Video is unavailable. Please make sure this video is playable in the browser '
|
'Video is unavailable. Please make sure this video is playable in the browser '
|
||||||
'before reporting this issue.', expected=True, video_id=video_id)
|
'before reporting this issue.', expected=True, video_id=video_id)
|
||||||
|
|
||||||
details = get_element_by_class('details', webpage) or ''
|
video = self._call_api('video', data, video_id, fatal=False)
|
||||||
uploader_html = get_element_html_by_class('creator', details) or ''
|
channel = None
|
||||||
channel_html = get_element_html_by_class('name', details) or ''
|
if channel_id := traverse_obj(video, ('channel', 'channel_id', {str})):
|
||||||
|
channel = self._call_api('channel', {'channel_id': channel_id}, video_id, fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
**traverse_obj(video, {
|
||||||
|
'title': ('video_name', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||||
|
'channel': ('channel', 'channel_name', {str}),
|
||||||
|
'channel_id': ('channel', 'channel_id', {str}),
|
||||||
|
'channel_url': ('channel', 'channel_url', {urljoin('https://www.bitchute.com/')}),
|
||||||
|
'uploader_id': ('profile_id', {str}),
|
||||||
|
'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter),
|
||||||
|
'timestamp': ('date_published', {parse_iso8601}),
|
||||||
|
'duration': ('duration', {parse_duration}),
|
||||||
|
'tags': ('hashtags', ..., {str}, filter, all, filter),
|
||||||
|
'view_count': ('view_count', {int_or_none}),
|
||||||
|
'is_live': ('state_id', {lambda x: x == 'live'}),
|
||||||
|
}),
|
||||||
|
**traverse_obj(channel, {
|
||||||
|
'channel': ('channel_name', {str}),
|
||||||
|
'channel_id': ('channel_id', {str}),
|
||||||
|
'channel_url': ('url_slug', {format_field(template=self._CHANNEL_URL_TMPL)}, filter),
|
||||||
|
'uploader': ('profile_name', {str}),
|
||||||
|
'uploader_id': ('profile_id', {str}),
|
||||||
|
'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter),
|
||||||
|
}),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
|
|
||||||
'description': self._og_search_description(webpage, default=None),
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
'uploader': clean_html(uploader_html),
|
|
||||||
'uploader_url': self._make_url(uploader_html),
|
|
||||||
'channel': clean_html(channel_html),
|
|
||||||
'channel_url': self._make_url(channel_html),
|
|
||||||
'upload_date': unified_strdate(self._search_regex(
|
|
||||||
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -190,7 +220,7 @@ class BitChuteChannelIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'This is the first video on #BitChute !',
|
'title': 'This is the first video on #BitChute !',
|
||||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||||
'uploader': 'BitChute',
|
'uploader': 'BitChute',
|
||||||
'upload_date': '20170103',
|
'upload_date': '20170103',
|
||||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||||
@ -198,6 +228,9 @@ class BitChuteChannelIE(InfoExtractor):
|
|||||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||||
'duration': 16,
|
'duration': 16,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'uploader_id': 'I5NgtHZn9vPj',
|
||||||
|
'channel_id': '1VBwRfyNcKdX',
|
||||||
|
'timestamp': 1483425443,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
@ -213,6 +246,7 @@ class BitChuteChannelIE(InfoExtractor):
|
|||||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||||
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
||||||
},
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
|
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user