1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-28 05:35:43 +00:00

[ie/Parlview] Rework extractor (#13788)

Closes #13787
Authored by: barryvan
This commit is contained in:
Barry van Oudtshoorn 2025-07-25 12:00:31 +08:00 committed by GitHub
parent 0adeb1e54b
commit 485de69dbf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,63 +1,63 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import parse_duration, parse_iso8601, url_or_none
int_or_none, from ..utils.traversal import traverse_obj
try_get,
unified_timestamp,
)
class ParlviewIE(InfoExtractor): class ParlviewIE(InfoExtractor):
_WORKING = False _VALID_URL = r'https?://(?:www\.)?aph\.gov\.au/News_and_Events/Watch_Read_Listen/ParlView/video/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P<id>\d{6})'
_TESTS = [{ _TESTS = [{
'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661', 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/3406614',
'info_dict': { 'info_dict': {
'id': '542661', 'id': '3406614',
'ext': 'mp4', 'ext': 'mp4',
'title': "Australia's Family Law System [Part 2]", 'title': 'Senate Chamber',
'duration': 5799, 'description': 'Official Recording of Senate Proceedings from the Australian Parliament',
'description': 'md5:7099883b391619dbae435891ca871a62', 'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/SenateParlview06.jpg',
'timestamp': 1621430700, 'upload_date': '20250325',
'upload_date': '20210519', 'duration': 17999,
'uploader': 'Joint Committee', 'timestamp': 1742939400,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936', 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/SV1394.dv',
'only_matching': True, 'info_dict': {
'id': 'SV1394.dv',
'ext': 'mp4',
'title': 'Senate Select Committee on Uranium Mining and Milling [Part 1]',
'description': 'Official Recording of Senate Committee Proceedings from the Australian Parliament',
'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/CommitteeThumbnail06.jpg',
'upload_date': '19960822',
'duration': 14765,
'timestamp': 840754200,
},
'params': {
'skip_download': True,
},
}] }]
_API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json'
_MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) video_details = self._download_json(
media = self._download_json(self._API_URL % video_id, video_id).get('media') f'https://vodapi.aph.gov.au/api/search/parlview/{video_id}', video_id)['videoDetails']
timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], str) or '/'
stream = try_get(media, lambda x: x['renditions'][0], dict) formats, subtitles = self._extract_m3u8_formats_and_subtitles(
if not stream: video_details['files']['file']['url'], video_id, 'mp4')
self.raise_no_formats('No streams were detected')
elif stream.get('streamType') != 'VOD':
self.raise_no_formats('Unknown type of stream was detected: "{}"'.format(str(stream.get('streamType'))))
formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native')
media_info = self._download_webpage( DURATION_RE = re.compile(r'(?P<duration>\d+:\d+:\d+):\d+')
self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False)
return { return {
'id': video_id, 'id': video_id,
'url': url,
'title': self._html_search_regex(r'<h2>([^<]+)<', webpage, 'title', fatal=False),
'formats': formats, 'formats': formats,
'duration': int_or_none(media.get('duration')), 'subtitles': subtitles,
'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')), **traverse_obj(video_details, {
'description': self._html_search_regex( 'title': (('parlViewTitle', 'title'), {str}, any),
r'<div[^>]+class="descripti?on"[^>]*>[^>]+<strong>[^>]+>[^>]+>([^<]+)', 'description': ('parlViewDescription', {str}),
webpage, 'description', fatal=False), 'duration': ('files', 'file', 'duration', {DURATION_RE.fullmatch}, 'duration', {parse_duration}),
'uploader': self._html_search_regex( 'timestamp': ('recordingFrom', {parse_iso8601}),
r'<td>[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False), 'thumbnail': ('thumbUrl', {url_or_none}),
'thumbnail': media.get('staticImage'), }),
} }