mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-09-03 08:35:32 +00:00
Native SponsorBlock implementation and related improvements (#360)
SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan
This commit is contained in:
96
yt_dlp/postprocessor/sponsorblock.py
Normal file
96
yt_dlp/postprocessor/sponsorblock.py
Normal file
@ -0,0 +1,96 @@
|
||||
import json
|
||||
import re
|
||||
from hashlib import sha256
|
||||
|
||||
from .ffmpeg import FFmpegPostProcessor
|
||||
from ..compat import compat_urllib_parse_urlencode, compat_HTTPError
|
||||
from ..utils import PostProcessingError, sanitized_Request
|
||||
|
||||
|
||||
class SponsorBlockPP(FFmpegPostProcessor):
|
||||
|
||||
EXTRACTORS = {
|
||||
'Youtube': 'YouTube',
|
||||
}
|
||||
CATEGORIES = {
|
||||
'sponsor': 'Sponsor',
|
||||
'intro': 'Intermission/Intro Animation',
|
||||
'outro': 'Endcards/Credits',
|
||||
'selfpromo': 'Unpaid/Self Promotion',
|
||||
'interaction': 'Interaction Reminder',
|
||||
'preview': 'Preview/Recap',
|
||||
'music_offtopic': 'Non-Music Section'
|
||||
}
|
||||
|
||||
def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
|
||||
FFmpegPostProcessor.__init__(self, downloader)
|
||||
self._categories = tuple(categories or self.CATEGORIES.keys())
|
||||
self._API_URL = api if re.match('^https?://', api) else 'https://' + api
|
||||
|
||||
def run(self, info):
|
||||
extractor = info['extractor_key']
|
||||
if extractor not in self.EXTRACTORS:
|
||||
self.to_screen(f'SponsorBlock is not supported for {extractor}')
|
||||
return [], info
|
||||
|
||||
info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration'])
|
||||
return [], info
|
||||
|
||||
def _get_sponsor_chapters(self, info, duration):
|
||||
segments = self._get_sponsor_segments(info['id'], self.EXTRACTORS[info['extractor_key']])
|
||||
|
||||
def duration_filter(s):
|
||||
start_end = s['segment']
|
||||
# Ignore milliseconds difference at the start.
|
||||
if start_end[0] <= 1:
|
||||
start_end[0] = 0
|
||||
# Ignore milliseconds difference at the end.
|
||||
# Never allow the segment to exceed the video.
|
||||
if duration and duration - start_end[1] <= 1:
|
||||
start_end[1] = duration
|
||||
# SponsorBlock duration may be absent or it may deviate from the real one.
|
||||
return s['videoDuration'] == 0 or not duration or abs(duration - s['videoDuration']) <= 1
|
||||
|
||||
duration_match = [s for s in segments if duration_filter(s)]
|
||||
if len(duration_match) != len(segments):
|
||||
self.report_warning('Some SponsorBlock segments are from a video of different duration, maybe from an old version of this video')
|
||||
|
||||
def to_chapter(s):
|
||||
(start, end), cat = s['segment'], s['category']
|
||||
return {
|
||||
'start_time': start,
|
||||
'end_time': end,
|
||||
'category': cat,
|
||||
'title': self.CATEGORIES[cat],
|
||||
'_categories': [(cat, start, end)]
|
||||
}
|
||||
|
||||
sponsor_chapters = [to_chapter(s) for s in duration_match]
|
||||
if not sponsor_chapters:
|
||||
self.to_screen('No segments were found in the SponsorBlock database')
|
||||
else:
|
||||
self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database')
|
||||
return sponsor_chapters
|
||||
|
||||
def _get_sponsor_segments(self, video_id, service):
|
||||
hash = sha256(video_id.encode('ascii')).hexdigest()
|
||||
# SponsorBlock API recommends using first 4 hash characters.
|
||||
url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({
|
||||
'service': service,
|
||||
'categories': json.dumps(self._categories),
|
||||
})
|
||||
for d in self._get_json(url):
|
||||
if d['videoID'] == video_id:
|
||||
return d['segments']
|
||||
return []
|
||||
|
||||
def _get_json(self, url):
|
||||
self.write_debug(f'SponsorBlock query: {url}')
|
||||
try:
|
||||
rsp = self._downloader.urlopen(sanitized_Request(url))
|
||||
except compat_HTTPError as e:
|
||||
if e.code == 404:
|
||||
return []
|
||||
raise PostProcessingError(f'Error communicating with SponsorBlock API - {e}')
|
||||
|
||||
return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
|
Reference in New Issue
Block a user