1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-05-07 22:39:45 +00:00

[ie/bitchute] Fix extractor (#13081)

Closes #13080
Authored by: bashonly
This commit is contained in:
bashonly 2025-05-03 14:31:33 -05:00 committed by GitHub
parent 17cf9088d0
commit 1d0f6539c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,30 +1,32 @@
import functools import functools
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest from ..networking import HEADRequest
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
clean_html, clean_html,
extract_attributes, determine_ext,
format_field,
get_element_by_class, get_element_by_class,
get_element_by_id,
get_element_html_by_class,
get_elements_html_by_class, get_elements_html_by_class,
int_or_none, int_or_none,
orderedSet, orderedSet,
parse_count, parse_count,
parse_duration, parse_duration,
traverse_obj, parse_iso8601,
unified_strdate, url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj
class BitChuteIE(InfoExtractor): class BitChuteIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/?#]+)/(?P<id>[^/?#&]+)'
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})'] _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
_TESTS = [{ _TESTS = [{
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
@ -34,12 +36,17 @@ class BitChuteIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'This is the first video on #BitChute !', 'title': 'This is the first video on #BitChute !',
'description': 'md5:a0337e7b1fe39e32336974af8173a034', 'description': 'md5:a0337e7b1fe39e32336974af8173a034',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https?://.+/.+\.jpg$',
'uploader': 'BitChute', 'uploader': 'BitChute',
'upload_date': '20170103', 'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute', 'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/', 'channel_url': 'https://www.bitchute.com/channel/bitchute/',
'uploader_id': 'I5NgtHZn9vPj',
'channel_id': '1VBwRfyNcKdX',
'view_count': int,
'duration': 16.0,
'timestamp': 1483425443,
}, },
}, { }, {
# test case: video with different channel and uploader # test case: video with different channel and uploader
@ -49,13 +56,18 @@ class BitChuteIE(InfoExtractor):
'id': 'Yti_j9A-UZ4', 'id': 'Yti_j9A-UZ4',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Israel at War | Full Measure', 'title': 'Israel at War | Full Measure',
'description': 'md5:38cf7bc6f42da1a877835539111c69ef', 'description': 'md5:e60198b89971966d6030d22b3268f08f',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https?://.+/.+\.jpg$',
'uploader': 'sharylattkisson', 'uploader': 'sharylattkisson',
'upload_date': '20231106', 'upload_date': '20231106',
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/', 'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
'channel': 'Full Measure with Sharyl Attkisson', 'channel': 'Full Measure with Sharyl Attkisson',
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/', 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
'uploader_id': '9K0kUWA9zmd9',
'channel_id': 'NpdxoCRv3ZLb',
'view_count': int,
'duration': 554.0,
'timestamp': 1699296106,
}, },
}, { }, {
# video not downloadable in browser, but we can recover it # video not downloadable in browser, but we can recover it
@ -66,25 +78,21 @@ class BitChuteIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'filesize': 71537926, 'filesize': 71537926,
'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control', 'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control',
'description': 'md5:228ee93bd840a24938f536aeac9cf749', 'description': 'md5:2029c7c212ccd4b040f52bb2d036ef4e',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https?://.+/.+\.jpg$',
'uploader': 'BitChute', 'uploader': 'BitChute',
'upload_date': '20181113', 'upload_date': '20181113',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute', 'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/', 'channel_url': 'https://www.bitchute.com/channel/bitchute/',
'uploader_id': 'I5NgtHZn9vPj',
'channel_id': '1VBwRfyNcKdX',
'view_count': int,
'duration': 1701.0,
'tags': ['bitchute'],
'timestamp': 1542130287,
}, },
'params': {'check_formats': None}, 'params': {'check_formats': None},
}, {
# restricted video
'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/',
'info_dict': {
'id': 'WEnQU7XGcTdl',
'ext': 'mp4',
'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft',
},
'params': {'skip_download': True},
'skip': 'Georestricted in DE',
}, { }, {
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
'only_matching': True, 'only_matching': True,
@ -96,11 +104,8 @@ class BitChuteIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_GEO_BYPASS = False _GEO_BYPASS = False
_UPLOADER_URL_TMPL = 'https://www.bitchute.com/profile/%s/'
_HEADERS = { _CHANNEL_URL_TMPL = 'https://www.bitchute.com/channel/%s/'
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
'Referer': 'https://www.bitchute.com/',
}
def _check_format(self, video_url, video_id): def _check_format(self, video_url, video_id):
urls = orderedSet( urls = orderedSet(
@ -112,7 +117,7 @@ class BitChuteIE(InfoExtractor):
for url in urls: for url in urls:
try: try:
response = self._request_webpage( response = self._request_webpage(
HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS) HEADRequest(url), video_id=video_id, note=f'Checking {url}')
except ExtractorError as e: except ExtractorError as e:
self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}') self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}')
continue continue
@ -121,54 +126,79 @@ class BitChuteIE(InfoExtractor):
'filesize': int_or_none(response.headers.get('Content-Length')), 'filesize': int_or_none(response.headers.get('Content-Length')),
} }
def _raise_if_restricted(self, webpage): def _call_api(self, endpoint, data, display_id, fatal=True):
page_title = clean_html(get_element_by_class('page-title', webpage)) or '' note = endpoint.rpartition('/')[2]
if re.fullmatch(r'(?:Channel|Video) Restricted', page_title): try:
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title return self._download_json(
self.raise_geo_restricted(reason) f'https://api.bitchute.com/api/beta/{endpoint}', display_id,
f'Downloading {note} API JSON', f'Unable to download {note} API JSON',
@staticmethod data=json.dumps(data).encode(),
def _make_url(html): headers={
path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href') 'Accept': 'application/json',
return urljoin('https://www.bitchute.com', path) 'Content-Type': 'application/json',
})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
errors = '. '.join(traverse_obj(e.cause.response.read().decode(), (
{json.loads}, 'errors', lambda _, v: v['context'] == 'reason', 'message', {str})))
if errors and 'location' in errors:
# Can always be fatal since the video/media call will reach this code first
self.raise_geo_restricted(errors)
if fatal:
raise
self.report_warning(e.msg)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( data = {'video_id': video_id}
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) media_url = self._call_api('video/media', data, video_id)['media_url']
self._raise_if_restricted(webpage)
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
entries = self._parse_html5_media_entries(url, webpage, video_id)
formats = [] formats = []
for format_ in traverse_obj(entries, (0, 'formats', ...)): if determine_ext(media_url) == 'm3u8':
formats.extend(
self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls', live=True))
else:
if self.get_param('check_formats') is not False: if self.get_param('check_formats') is not False:
format_.update(self._check_format(format_.pop('url'), video_id) or {}) if fmt := self._check_format(media_url, video_id):
if 'url' not in format_: formats.append(fmt)
continue else:
formats.append(format_) formats.append({'url': media_url})
if not formats: if not formats:
self.raise_no_formats( self.raise_no_formats(
'Video is unavailable. Please make sure this video is playable in the browser ' 'Video is unavailable. Please make sure this video is playable in the browser '
'before reporting this issue.', expected=True, video_id=video_id) 'before reporting this issue.', expected=True, video_id=video_id)
details = get_element_by_class('details', webpage) or '' video = self._call_api('video', data, video_id, fatal=False)
uploader_html = get_element_html_by_class('creator', details) or '' channel = None
channel_html = get_element_html_by_class('name', details) or '' if channel_id := traverse_obj(video, ('channel', 'channel_id', {str})):
channel = self._call_api('channel', {'channel_id': channel_id}, video_id, fatal=False)
return { return {
**traverse_obj(video, {
'title': ('video_name', {str}),
'description': ('description', {str}),
'thumbnail': ('thumbnail_url', {url_or_none}),
'channel': ('channel', 'channel_name', {str}),
'channel_id': ('channel', 'channel_id', {str}),
'channel_url': ('channel', 'channel_url', {urljoin('https://www.bitchute.com/')}),
'uploader_id': ('profile_id', {str}),
'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter),
'timestamp': ('date_published', {parse_iso8601}),
'duration': ('duration', {parse_duration}),
'tags': ('hashtags', ..., {str}, filter, all, filter),
'view_count': ('view_count', {int_or_none}),
'is_live': ('state_id', {lambda x: x == 'live'}),
}),
**traverse_obj(channel, {
'channel': ('channel_name', {str}),
'channel_id': ('channel_id', {str}),
'channel_url': ('url_slug', {format_field(template=self._CHANNEL_URL_TMPL)}, filter),
'uploader': ('profile_name', {str}),
'uploader_id': ('profile_id', {str}),
'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter),
}),
'id': video_id, 'id': video_id,
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': clean_html(uploader_html),
'uploader_url': self._make_url(uploader_html),
'channel': clean_html(channel_html),
'channel_url': self._make_url(channel_html),
'upload_date': unified_strdate(self._search_regex(
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
'formats': formats, 'formats': formats,
} }
@ -190,7 +220,7 @@ class BitChuteChannelIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'This is the first video on #BitChute !', 'title': 'This is the first video on #BitChute !',
'description': 'md5:a0337e7b1fe39e32336974af8173a034', 'description': 'md5:a0337e7b1fe39e32336974af8173a034',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https?://.+/.+\.jpg$',
'uploader': 'BitChute', 'uploader': 'BitChute',
'upload_date': '20170103', 'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
@ -198,6 +228,9 @@ class BitChuteChannelIE(InfoExtractor):
'channel_url': 'https://www.bitchute.com/channel/bitchute/', 'channel_url': 'https://www.bitchute.com/channel/bitchute/',
'duration': 16, 'duration': 16,
'view_count': int, 'view_count': int,
'uploader_id': 'I5NgtHZn9vPj',
'channel_id': '1VBwRfyNcKdX',
'timestamp': 1483425443,
}, },
}, },
], ],
@ -213,6 +246,7 @@ class BitChuteChannelIE(InfoExtractor):
'title': 'Bruce MacDonald and "The Light of Darkness"', 'title': 'Bruce MacDonald and "The Light of Darkness"',
'description': 'md5:747724ef404eebdfc04277714f81863e', 'description': 'md5:747724ef404eebdfc04277714f81863e',
}, },
'skip': '404 Not Found',
}, { }, {
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/', 'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
'only_matching': True, 'only_matching': True,