diff --git a/yt_dlp/extractor/parlview.py b/yt_dlp/extractor/parlview.py index b93b5edacd..9c7efc58f4 100644 --- a/yt_dlp/extractor/parlview.py +++ b/yt_dlp/extractor/parlview.py @@ -1,63 +1,63 @@ +import re + from .common import InfoExtractor -from ..utils import ( - int_or_none, - try_get, - unified_timestamp, -) +from ..utils import parse_duration, parse_iso8601, url_or_none +from ..utils.traversal import traverse_obj class ParlviewIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P\d{6})' + _VALID_URL = r'https?://(?:www\.)?aph\.gov\.au/News_and_Events/Watch_Read_Listen/ParlView/video/(?P[^/?#]+)' _TESTS = [{ - 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661', + 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/3406614', 'info_dict': { - 'id': '542661', + 'id': '3406614', 'ext': 'mp4', - 'title': "Australia's Family Law System [Part 2]", - 'duration': 5799, - 'description': 'md5:7099883b391619dbae435891ca871a62', - 'timestamp': 1621430700, - 'upload_date': '20210519', - 'uploader': 'Joint Committee', + 'title': 'Senate Chamber', + 'description': 'Official Recording of Senate Proceedings from the Australian Parliament', + 'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/SenateParlview06.jpg', + 'upload_date': '20250325', + 'duration': 17999, + 'timestamp': 1742939400, }, 'params': { 'skip_download': True, }, }, { - 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936', - 'only_matching': True, + 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/SV1394.dv', + 'info_dict': { + 'id': 'SV1394.dv', + 'ext': 'mp4', + 'title': 'Senate Select Committee on Uranium Mining and Milling [Part 1]', + 'description': 'Official Recording of Senate Committee Proceedings from the Australian Parliament', + 'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/CommitteeThumbnail06.jpg', + 'upload_date': '19960822', + 'duration': 14765, + 'timestamp': 840754200, + }, + 'params': { + 'skip_download': True, + }, }] - _API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json' - _MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - media = self._download_json(self._API_URL % video_id, video_id).get('media') - timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], str) or '/' + video_details = self._download_json( + f'https://vodapi.aph.gov.au/api/search/parlview/{video_id}', video_id)['videoDetails'] - stream = try_get(media, lambda x: x['renditions'][0], dict) - if not stream: - self.raise_no_formats('No streams were detected') - elif stream.get('streamType') != 'VOD': - self.raise_no_formats('Unknown type of stream was detected: "{}"'.format(str(stream.get('streamType')))) - formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native') + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + video_details['files']['file']['url'], video_id, 'mp4') - media_info = self._download_webpage( - self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False) + DURATION_RE = re.compile(r'(?P\d+:\d+:\d+):\d+') return { 'id': video_id, - 'url': url, - 'title': self._html_search_regex(r'

([^<]+)<', webpage, 'title', fatal=False), 'formats': formats, - 'duration': int_or_none(media.get('duration')), - 'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')), - 'description': self._html_search_regex( - r']+class="descripti?on"[^>]*>[^>]+[^>]+>[^>]+>([^<]+)', - webpage, 'description', fatal=False), - 'uploader': self._html_search_regex( - r'[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False), - 'thumbnail': media.get('staticImage'), + 'subtitles': subtitles, + **traverse_obj(video_details, { + 'title': (('parlViewTitle', 'title'), {str}, any), + 'description': ('parlViewDescription', {str}), + 'duration': ('files', 'file', 'duration', {DURATION_RE.fullmatch}, 'duration', {parse_duration}), + 'timestamp': ('recordingFrom', {parse_iso8601}), + 'thumbnail': ('thumbUrl', {url_or_none}), + }), }