diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index acc4ce38dc..88c96a9506 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -7,6 +7,7 @@ from ..utils import ( int_or_none, strip_or_none, + url_or_none, ) @@ -98,7 +99,7 @@ def _real_extract(self, url): if not video_id: entries = [] for episode in video_data.get('archiveEpisodes', []): - episode_url = episode.get('url') + episode_url = url_or_none(episode.get('url')) if not episode_url: continue entries.append(self.url_result( diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 4b3d971365..6275e5209e 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -9,6 +9,7 @@ determine_ext, ExtractorError, int_or_none, + url_or_none, urlencode_postdata, xpath_text, ) @@ -304,7 +305,7 @@ def _real_extract(self, url): file_elements = video_element.findall(compat_xpath('./file')) one = len(file_elements) == 1 for file_num, file_element in enumerate(file_elements, start=1): - file_url = file_element.text + file_url = url_or_none(file_element.text) if not file_url: continue key = file_element.get('key', '') diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py index fde1a8ff74..7ff098cfa0 100644 --- a/youtube_dl/extractor/amp.py +++ b/youtube_dl/extractor/amp.py @@ -3,11 +3,12 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, - parse_iso8601, - mimetype2ext, determine_ext, ExtractorError, + int_or_none, + mimetype2ext, + parse_iso8601, + url_or_none, ) @@ -35,7 +36,7 @@ def get_media_node(name, default=None): media_thumbnail = [media_thumbnail] for thumbnail_data in media_thumbnail: thumbnail = thumbnail_data.get('@attributes', {}) - thumbnail_url = thumbnail.get('url') + thumbnail_url = url_or_none(thumbnail.get('url')) if not thumbnail_url: continue thumbnails.append({ @@ -51,7 +52,7 @@ def get_media_node(name, default=None): media_subtitle = [media_subtitle] for subtitle_data in media_subtitle: subtitle = subtitle_data.get('@attributes', {}) - subtitle_href = subtitle.get('href') + subtitle_href = url_or_none(subtitle.get('href')) if not subtitle_href: continue subtitles.setdefault(subtitle.get('lang') or 'en', []).append({ @@ -65,7 +66,7 @@ def get_media_node(name, default=None): media_content = [media_content] for media_data in media_content: media = media_data.get('@attributes', {}) - media_url = media.get('url') + media_url = url_or_none(media.get('url')) if not media_url: continue ext = mimetype2ext(media.get('type')) or determine_ext(media_url) @@ -79,7 +80,7 @@ def get_media_node(name, default=None): else: formats.append({ 'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'), - 'url': media['url'], + 'url': media_url, 'tbr': int_or_none(media.get('bitrate')), 'filesize': int_or_none(media.get('fileSize')), 'ext': ext, diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 1fe5d5e56e..00ce684d1c 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -8,6 +8,7 @@ determine_ext, extract_attributes, ExtractorError, + url_or_none, urlencode_postdata, urljoin, ) @@ -165,7 +166,7 @@ def extract_info(html, video_id, num=None): }, fatal=False) if not playlist: continue - stream_url = playlist.get('streamurl') + stream_url = url_or_none(playlist.get('streamurl')) if stream_url: rtmp = re.search( r'^(?Prtmpe?://(?P[^/]+)/(?P.+/))(?Pmp[34]:.+)', diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index b50f454ee0..cb92791931 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -7,6 +7,7 @@ from ..utils import ( ExtractorError, int_or_none, + url_or_none, ) @@ -77,7 +78,7 @@ def _real_extract(self, url): formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) for rendition in video_data.get('renditions', []): - video_url = rendition.get('url') + video_url = url_or_none(rendition.get('url')) if not video_url: continue ext = rendition.get('format') diff --git a/youtube_dl/extractor/apa.py b/youtube_dl/extractor/apa.py index a30a935aa8..98ccdaa4a9 100644 --- a/youtube_dl/extractor/apa.py +++ b/youtube_dl/extractor/apa.py @@ -4,10 +4,10 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( determine_ext, js_to_json, + url_or_none, ) @@ -68,8 +68,8 @@ def _real_extract(self, url): for source in sources: if not isinstance(source, dict): continue - source_url = source.get('file') - if not source_url or not isinstance(source_url, compat_str): + source_url = url_or_none(source.get('file')) + if not source_url: continue ext = determine_ext(source_url) if ext == 'm3u8': diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index e394cb6614..6eb8bbb6e9 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -5,6 +5,7 @@ from ..utils import ( int_or_none, mimetype2ext, + url_or_none, ) @@ -43,7 +44,7 @@ def _real_extract(self, url): formats = [] for item in file_list[0]: - file_url = item.get('file') + file_url = url_or_none(item.get('file')) if not file_url: continue ext = mimetype2ext(item.get('type')) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 86951d975d..23f574d367 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -5,7 +5,6 @@ from .common import InfoExtractor from .generic import GenericIE -from ..compat import compat_str from ..utils import ( determine_ext, ExtractorError, @@ -15,6 +14,7 @@ unified_strdate, xpath_text, update_url_query, + url_or_none, ) from ..compat import compat_etree_fromstring @@ -100,7 +100,7 @@ def _extract_formats(self, media_info, video_id): quality = stream.get('_quality') server = stream.get('_server') for stream_url in stream_urls: - if not isinstance(stream_url, compat_str) or '//' not in stream_url: + if not url_or_none(stream_url): continue ext = determine_ext(stream_url) if quality != 'auto' and ext in ('f4m', 'm3u8'): diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index be41bd5a22..b8514734d5 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -19,6 +19,7 @@ unescapeHTML, update_url_query, unified_strdate, + url_or_none, ) @@ -131,8 +132,8 @@ def _real_extract(self, url): fatal=False) if not stat: continue - retry_url = stat.get('retry_url') - if not isinstance(retry_url, compat_str): + retry_url = url_or_none(stat.get('retry_url')) + if not retry_url: continue formats.append({ 'url': self._proto_relative_url(retry_url, 'http:'), @@ -306,7 +307,7 @@ def _real_extract(self, url): formats = [] for format_id, format_url in show['audio_stream'].items(): - if not isinstance(format_url, compat_str): + if not url_or_none(format_url): continue for known_ext in KNOWN_EXTENSIONS: if known_ext in format_id: diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py index 70d16767f1..68c7cf2bba 100644 --- a/youtube_dl/extractor/breakcom.py +++ b/youtube_dl/extractor/breakcom.py @@ -4,8 +4,10 @@ from .common import InfoExtractor from .youtube import YoutubeIE -from ..compat import compat_str -from ..utils import int_or_none +from ..utils import ( + int_or_none, + url_or_none, +) class BreakIE(InfoExtractor): @@ -55,8 +57,8 @@ def _real_extract(self, url): formats = [] for video in content: - video_url = video.get('url') - if not video_url or not isinstance(video_url, compat_str): + video_url = url_or_none(video.get('url')) + if not video_url: continue bitrate = int_or_none(self._search_regex( r'(\d+)_kbps', video_url, 'tbr', default=None)) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index ee0165dbaa..79350817f5 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -2,10 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, + url_or_none, ) @@ -56,8 +56,8 @@ def _real_extract(self, url): for media in encodings: if not isinstance(media, dict): continue - media_url = media.get('location') - if not media_url or not isinstance(media_url, compat_str): + media_url = url_or_none(media.get('location')) + if not media_url: continue format_id_list = [format_id] diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index 07f5206c12..544647f92b 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -4,13 +4,13 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( clean_html, int_or_none, parse_duration, parse_iso8601, parse_resolution, + url_or_none, ) @@ -53,8 +53,8 @@ def _real_extract(self, url): media_url = media['media']['url'] if isinstance(media_url, list): for format_ in media_url: - format_url = format_.get('file') - if not format_url or not isinstance(format_url, compat_str): + format_url = url_or_none(format_.get('file')) + if not format_url: continue label = format_.get('label') f = parse_resolution(label) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index f4a6164550..8dd9d66872 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -4,16 +4,14 @@ import re from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_HTTPError, -) +from ..compat import compat_HTTPError from ..utils import ( determine_ext, float_or_none, int_or_none, parse_age_limit, parse_duration, + url_or_none, ExtractorError ) @@ -86,8 +84,8 @@ def _real_extract(self, url): for e in media['MediaURLs']: if e.get('UseDRM') is True: continue - format_url = e.get('Path') - if not format_url or not isinstance(format_url, compat_str): + format_url = url_or_none(e.get('Path')) + if not format_url: continue ext = determine_ext(format_url) if ext == 'm3u8': @@ -124,8 +122,8 @@ def _real_extract(self, url): for cc_file in cc_files: if not isinstance(cc_file, dict): continue - cc_url = cc_file.get('Path') - if not cc_url or not isinstance(cc_url, compat_str): + cc_url = url_or_none(cc_file.get('Path')) + if not cc_url: continue lang = cc_file.get('Locale') or 'en' subtitles.setdefault(lang, []).append({'url': cc_url}) diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index dc0c41b8a5..769a219dff 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -7,6 +7,7 @@ float_or_none, int_or_none, unified_timestamp, + url_or_none, ) @@ -69,7 +70,7 @@ def _real_extract(self, url): endpoint = next( server['endpoint'] for server in servers - if isinstance(server.get('endpoint'), compat_str) and + if url_or_none(server.get('endpoint')) and 'cloudfront' in server['endpoint']) else: endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/' @@ -92,8 +93,8 @@ def _real_extract(self, url): for image in images: if not isinstance(image, dict): continue - image_url = image.get('url') - if not image_url or not isinstance(image_url, compat_str): + image_url = url_or_none(image.get('url')) + if not image_url: continue thumbnails.append({ 'url': image_url, diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index 3368c4c075..9e7b14a7d5 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -3,7 +3,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( determine_ext, extract_attributes, @@ -12,6 +11,7 @@ parse_age_limit, remove_end, unescapeHTML, + url_or_none, ) @@ -69,9 +69,8 @@ def _extract_video_info(self, video, stream, display_id): captions = stream.get('captions') if isinstance(captions, list): for caption in captions: - subtitle_url = caption.get('fileUrl') - if (not subtitle_url or not isinstance(subtitle_url, compat_str) or - not subtitle_url.startswith('http')): + subtitle_url = url_or_none(caption.get('fileUrl')) + if not subtitle_url or not subtitle_url.startswith('http'): continue lang = caption.get('fileLang', 'en') ext = determine_ext(subtitle_url) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index ab32ba4ff3..db1de699fa 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -7,7 +7,6 @@ from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_str, compat_urlparse, ) from ..utils import ( @@ -17,6 +16,7 @@ parse_age_limit, parse_duration, unified_timestamp, + url_or_none, ) @@ -139,8 +139,8 @@ def _get_subtitles(self, video_id): for sub in subs: if not isinstance(sub, dict): continue - sub_url = sub.get('url') - if not sub_url or not isinstance(sub_url, compat_str): + sub_url = url_or_none(sub.get('url')) + if not sub_url: continue subtitles.setdefault( sub.get('code') or sub.get('language') or 'en', []).append({ @@ -163,8 +163,8 @@ def _real_extract(self, url): for format_id, format_dict in download_assets.items(): if not isinstance(format_dict, dict): continue - format_url = format_dict.get('url') - if not format_url or not isinstance(format_url, compat_str): + format_url = url_or_none(format_dict.get('url')) + if not format_url: continue formats.append({ 'url': format_url, diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 42789278e3..36fef07b72 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -4,14 +4,12 @@ import re from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, -) +from ..compat import compat_HTTPError from ..utils import ( ExtractorError, int_or_none, unsmuggle_url, + url_or_none, ) @@ -177,7 +175,7 @@ def _real_extract(self, url): video_id, 'Downloading mp4 JSON', fatal=False) if mp4_data: for format_id, format_url in mp4_data.get('data', {}).items(): - if not isinstance(format_url, compat_str): + if not url_or_none(format_url): continue height = int_or_none(format_id) if height is not None and m3u8_formats_dict.get(height): diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index edabaafe68..df11dc206d 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -8,6 +8,7 @@ int_or_none, try_get, unified_timestamp, + url_or_none, ) @@ -34,8 +35,8 @@ def _real_extract(self, url): entries = [] for lesson in lessons: - lesson_url = lesson.get('http_url') - if not lesson_url or not isinstance(lesson_url, compat_str): + lesson_url = url_or_none(lesson.get('http_url')) + if not lesson_url: continue lesson_id = lesson.get('id') if lesson_id: @@ -95,7 +96,8 @@ def _real_extract(self, url): formats = [] for _, format_url in lesson['media_urls'].items(): - if not format_url or not isinstance(format_url, compat_str): + format_url = url_or_none(format_url) + if not format_url: continue ext = determine_ext(format_url) if ext == 'm3u8': diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index 81f2e2ee1c..6d03d70958 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -11,6 +11,7 @@ int_or_none, parse_duration, str_to_int, + url_or_none, ) @@ -82,8 +83,8 @@ def calc_hash(s): for format_id, format_dict in formats_dict.items(): if not isinstance(format_dict, dict): continue - src = format_dict.get('src') - if not isinstance(src, compat_str) or not src.startswith('http'): + src = url_or_none(format_dict.get('src')) + if not src or not src.startswith('http'): continue if kind == 'hls': formats.extend(self._extract_m3u8_formats( diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 4803a22c81..28617d83c7 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -10,6 +10,7 @@ int_or_none, qualities, unified_strdate, + url_or_none, ) @@ -88,8 +89,8 @@ def _real_extract(self, url): formats = [] path = None for f in item.get('mbr', []): - src = f.get('src') - if not src or not isinstance(src, compat_str): + src = url_or_none(f.get('src')) + if not src: continue tbr = int_or_none(self._search_regex( r'_(\d{3,})\.mp4', src, 'tbr', default=None)) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 6fc6b0da07..2ffe83a78c 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -16,6 +16,7 @@ int_or_none, parse_duration, try_get, + url_or_none, ) from .dailymotion import DailymotionIE @@ -115,14 +116,13 @@ def _extract_video(self, video_id, catalogue=None): def sign(manifest_url, manifest_id): for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'): - signed_url = self._download_webpage( + signed_url = url_or_none(self._download_webpage( 'https://%s/esi/TA' % host, video_id, 'Downloading signed %s manifest URL' % manifest_id, fatal=False, query={ 'url': manifest_url, - }) - if (signed_url and isinstance(signed_url, compat_str) and - re.search(r'^(?:https?:)?//', signed_url)): + })) + if signed_url: return signed_url return manifest_url diff --git a/youtube_dl/extractor/frontendmasters.py b/youtube_dl/extractor/frontendmasters.py index 770db46d0d..cb57ba007f 100644 --- a/youtube_dl/extractor/frontendmasters.py +++ b/youtube_dl/extractor/frontendmasters.py @@ -11,6 +11,7 @@ from ..utils import ( ExtractorError, parse_duration, + url_or_none, urlencode_postdata, ) @@ -80,7 +81,7 @@ def _extract_chapters(course): chapters = [] lesson_elements = course.get('lessonElements') if isinstance(lesson_elements, list): - chapters = [e for e in lesson_elements if isinstance(e, compat_str)] + chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)] return chapters @staticmethod diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index aa04905ed8..e5a8ffbe8d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -32,6 +32,7 @@ unified_strdate, unsmuggle_url, UnsupportedError, + url_or_none, xpath_text, ) from .commonprotocols import RtmpIE @@ -3130,8 +3131,8 @@ def _real_extract(self, url): sources = [sources] formats = [] for source in sources: - src = source.get('src') - if not src or not isinstance(src, compat_str): + src = url_or_none(source.get('src')) + if not src: continue src = compat_urlparse.urljoin(url, src) src_type = source.get('type') diff --git a/youtube_dl/extractor/hidive.py b/youtube_dl/extractor/hidive.py index 39fabe8a55..f26f802656 100644 --- a/youtube_dl/extractor/hidive.py +++ b/youtube_dl/extractor/hidive.py @@ -8,6 +8,7 @@ from ..utils import ( ExtractorError, int_or_none, + url_or_none, urlencode_postdata, ) @@ -80,8 +81,8 @@ def _real_extract(self, url): bitrates = rendition.get('bitrates') if not isinstance(bitrates, dict): continue - m3u8_url = bitrates.get('hls') - if not isinstance(m3u8_url, compat_str): + m3u8_url = url_or_none(bitrates.get('hls')) + if not m3u8_url: continue formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', @@ -93,9 +94,8 @@ def _real_extract(self, url): if not isinstance(cc_file, list) or len(cc_file) < 3: continue cc_lang = cc_file[0] - cc_url = cc_file[2] - if not isinstance(cc_lang, compat_str) or not isinstance( - cc_url, compat_str): + cc_url = url_or_none(cc_file[2]) + if not isinstance(cc_lang, compat_str) or not cc_url: continue subtitles.setdefault(cc_lang, []).append({ 'url': cc_url, diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 4bafa54a21..fba01ef49e 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -3,12 +3,12 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( determine_ext, mimetype2ext, parse_duration, qualities, + url_or_none, ) @@ -61,8 +61,8 @@ def _real_extract(self, url): for encoding in video_metadata.get('encodings', []): if not encoding or not isinstance(encoding, dict): continue - video_url = encoding.get('videoUrl') - if not video_url or not isinstance(video_url, compat_str): + video_url = url_or_none(encoding.get('videoUrl')) + if not video_url: continue ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType'))) if ext == 'm3u8': diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 0c13f54ee0..7e0e838f05 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -17,6 +17,7 @@ lowercase_escape, std_headers, try_get, + url_or_none, ) @@ -170,7 +171,7 @@ def get_count(key, kind): node = try_get(edge, lambda x: x['node'], dict) if not node: continue - node_video_url = try_get(node, lambda x: x['video_url'], compat_str) + node_video_url = url_or_none(node.get('video_url')) if not node_video_url: continue entries.append({ diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index d05a7b68dc..de65b6bb45 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -20,6 +20,7 @@ merge_dicts, parse_duration, smuggle_url, + url_or_none, xpath_with_ns, xpath_element, xpath_text, @@ -250,8 +251,8 @@ def extract_subtitle(sub_url): for sub in subs: if not isinstance(sub, dict): continue - href = sub.get('Href') - if isinstance(href, compat_str): + href = url_or_none(sub.get('Href')) + if href: extract_subtitle(href) if not info.get('duration'): info['duration'] = parse_duration(video_data.get('Duration')) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index d4e6f7ac17..c3eb74c174 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -4,16 +4,14 @@ from .common import InfoExtractor from ..aes import aes_decrypt_text -from ..compat import ( - compat_str, - compat_urllib_parse_unquote, -) +from ..compat import compat_urllib_parse_unquote from ..utils import ( determine_ext, ExtractorError, int_or_none, str_to_int, strip_or_none, + url_or_none, ) @@ -55,7 +53,8 @@ def _extract_info(self, url, fatal=True): encrypted = False def extract_format(format_url, height=None): - if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')): + format_url = url_or_none(format_url) + if not format_url or not format_url.startswith(('http', '//')): return if format_url in format_urls: return diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py index c11cbcf475..dd42bb2f27 100644 --- a/youtube_dl/extractor/konserthusetplay.py +++ b/youtube_dl/extractor/konserthusetplay.py @@ -2,11 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( determine_ext, float_or_none, int_or_none, + url_or_none, ) @@ -109,7 +109,8 @@ def _real_extract(self, url): captions = source.get('captionsAvailableLanguages') if isinstance(captions, dict): for lang, subtitle_url in captions.items(): - if lang != 'none' and isinstance(subtitle_url, compat_str): + subtitle_url = url_or_none(subtitle_url) + if lang != 'none' and subtitle_url: subtitles.setdefault(lang, []).append({'url': subtitle_url}) return { diff --git a/youtube_dl/extractor/mediasite.py b/youtube_dl/extractor/mediasite.py index 0e2645c550..84876b8838 100644 --- a/youtube_dl/extractor/mediasite.py +++ b/youtube_dl/extractor/mediasite.py @@ -15,6 +15,7 @@ mimetype2ext, unescapeHTML, unsmuggle_url, + url_or_none, urljoin, ) @@ -156,8 +157,8 @@ def _real_extract(self, url): stream_formats = [] for unum, VideoUrl in enumerate(video_urls): - video_url = VideoUrl.get('Location') - if not video_url or not isinstance(video_url, compat_str): + video_url = url_or_none(VideoUrl.get('Location')) + if not video_url: continue # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index d9849a2baf..e03c3d1d3d 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -10,6 +10,7 @@ parse_resolution, try_get, unified_timestamp, + url_or_none, urljoin, ) @@ -200,8 +201,8 @@ def _real_extract(self, url): for file_ in video['files']: if not isinstance(file_, dict): continue - file_url = file_.get('fileUrl') - if not file_url or not isinstance(file_url, compat_str): + file_url = url_or_none(file_.get('fileUrl')) + if not file_url: continue file_size = int_or_none(file_.get('size')) format_id = try_get( diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 879bcf81d8..10311a81a8 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -3,12 +3,12 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, str_to_int, unified_strdate, + url_or_none, ) @@ -71,8 +71,8 @@ def _real_extract(self, url): video_id, fatal=False) if medias and isinstance(medias, list): for media in medias: - format_url = media.get('videoUrl') - if not format_url or not isinstance(format_url, compat_str): + format_url = url_or_none(media.get('videoUrl')) + if not format_url: continue format_id = media.get('quality') formats.append({ diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py index 8bcf87126b..7c8909d953 100644 --- a/youtube_dl/extractor/rentv.py +++ b/youtube_dl/extractor/rentv.py @@ -6,6 +6,7 @@ from ..utils import ( determine_ext, int_or_none, + url_or_none, ) @@ -37,8 +38,8 @@ def _real_extract(self, url): title = config['title'] formats = [] for video in config['src']: - src = video.get('src') - if not src or not isinstance(src, compat_str): + src = url_or_none(video.get('src')) + if not src: continue ext = determine_ext(src) if ext == 'm3u8': diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 89d89b65a8..261bcbb83e 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -16,6 +16,7 @@ int_or_none, try_get, unified_timestamp, + url_or_none, ) @@ -176,8 +177,8 @@ def _entries(self, playlist_id, *args, **kwargs): break for result in results: - video_url = result.get('video_url') - if not video_url or not isinstance(video_url, compat_str): + video_url = url_or_none(result.get('video_url')) + if not video_url: continue entry = self._extract_video(result, require_title=False) entry.update({ diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 2b7b0d6e1b..4a6cbfbb88 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -15,6 +15,7 @@ update_url_query, ExtractorError, strip_or_none, + url_or_none, ) @@ -154,8 +155,8 @@ def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}): subtitles = {} for source in video_data.findall('closedCaptions/source'): for track in source.findall('track'): - track_url = track.get('url') - if not isinstance(track_url, compat_str) or track_url.endswith('/big'): + track_url = url_or_none(track.get('url')) + if not track_url or track_url.endswith('/big'): continue lang = track.get('lang') or track.get('label') or 'en' subtitles.setdefault(lang, []).append({ diff --git a/youtube_dl/extractor/tvnet.py b/youtube_dl/extractor/tvnet.py index 2b2630b916..4222ff9ee2 100644 --- a/youtube_dl/extractor/tvnet.py +++ b/youtube_dl/extractor/tvnet.py @@ -4,10 +4,10 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, unescapeHTML, + url_or_none, ) @@ -106,9 +106,8 @@ def _real_extract(self, url): for stream in self._download_json(data_file, video_id): if not isinstance(stream, dict): continue - stream_url = stream.get('url') - if (stream_url in stream_urls or not stream_url or - not isinstance(stream_url, compat_str)): + stream_url = url_or_none(stream.get('url')) + if stream_url in stream_urls or not stream_url: continue stream_urls.add(stream_url) formats.extend(self._extract_m3u8_formats( diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index e09b5f804d..d3adab4574 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -19,6 +19,7 @@ try_get, unsmuggle_url, update_url_query, + url_or_none, ) @@ -255,7 +256,8 @@ def _real_extract(self, url): quality = qualities(['hls', 'medium', 'high']) formats = [] for format_id, video_url in streams.get('streams', {}).items(): - if not video_url or not isinstance(video_url, compat_str): + video_url = url_or_none(video_url) + if not video_url: continue ext = determine_ext(video_url) if ext == 'f4m': diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index e01f113310..89ee44224b 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -27,6 +27,7 @@ unified_timestamp, update_url_query, urlencode_postdata, + url_or_none, urljoin, ) @@ -663,8 +664,8 @@ def _real_extract(self, url): for option in status['quality_options']: if not isinstance(option, dict): continue - source = option.get('source') - if not source or not isinstance(source, compat_str): + source = url_or_none(option.get('source')) + if not source: continue formats.append({ 'url': source, diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index a7196997ec..79c45f80e0 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -20,6 +20,7 @@ sanitized_Request, try_get, unescapeHTML, + url_or_none, urlencode_postdata, ) @@ -265,8 +266,8 @@ def extract_formats(source_list): if not isinstance(source_list, list): return for source in source_list: - video_url = source.get('file') or source.get('src') - if not video_url or not isinstance(video_url, compat_str): + video_url = url_or_none(source.get('file') or source.get('src')) + if not video_url: continue if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( @@ -293,8 +294,8 @@ def extract_subtitles(track_list): continue if track.get('kind') != 'captions': continue - src = track.get('src') - if not src or not isinstance(src, compat_str): + src = url_or_none(track.get('src')) + if not src: continue lang = track.get('language') or track.get( 'srclang') or track.get('label') @@ -314,8 +315,8 @@ def extract_subtitles(track_list): for cc in captions: if not isinstance(cc, dict): continue - cc_url = cc.get('url') - if not cc_url or not isinstance(cc_url, compat_str): + cc_url = url_or_none(cc.get('url')) + if not cc_url: continue lang = try_get(cc, lambda x: x['locale']['locale'], compat_str) sub_dict = (automatic_captions if cc.get('source') == 'auto' diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index 59adb2377e..174e69cd6b 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -3,15 +3,13 @@ import itertools from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, -) +from ..compat import compat_HTTPError from ..utils import ( ExtractorError, int_or_none, float_or_none, parse_iso8601, + url_or_none, ) @@ -166,8 +164,8 @@ def _real_extract(self, url): formats = [] for f in video.get('formats', []): - format_url = f.get('uri') - if not format_url or not isinstance(format_url, compat_str): + format_url = url_or_none(f.get('uri')) + if not format_url: continue format_type = f.get('type') if format_type == 'dash': diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 29002b35fc..48b5987c2e 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -20,6 +20,7 @@ str_to_int, unescapeHTML, unified_timestamp, + url_or_none, urlencode_postdata, ) from .dailymotion import DailymotionIE @@ -423,7 +424,8 @@ def _real_extract(self, url): formats = [] for format_id, format_url in data.items(): - if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')): + format_url = url_or_none(format_url) + if not format_url or not format_url.startswith(('http', '//', 'rtmp')): continue if (format_id.startswith(('url', 'cache')) or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')): diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index d1bc992fd9..68a48034ea 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -13,6 +13,7 @@ parse_duration, try_get, unified_strdate, + url_or_none, ) @@ -137,7 +138,8 @@ def get_height(s): else: format_url = format_item filesize = None - if not isinstance(format_url, compat_str): + format_url = url_or_none(format_url) + if not format_url: continue formats.append({ 'format_id': '%s-%s' % (format_id, quality), @@ -198,7 +200,8 @@ def get_height(s): default='{}'), video_id, fatal=False) for format_id, format_url in sources.items(): - if not isinstance(format_url, compat_str): + format_url = url_or_none(format_url) + if not format_url: continue if format_url in format_urls: continue diff --git a/youtube_dl/extractor/yapfiles.py b/youtube_dl/extractor/yapfiles.py index 7fafbf5969..cfb368de94 100644 --- a/youtube_dl/extractor/yapfiles.py +++ b/youtube_dl/extractor/yapfiles.py @@ -4,12 +4,12 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, qualities, unescapeHTML, + url_or_none, ) @@ -80,9 +80,9 @@ def _real_extract(self, url): formats = [] for format_id in QUALITIES: is_hd = format_id == 'hd' - format_url = playlist.get( - 'file%s' % ('_hd' if is_hd else '')) - if not format_url or not isinstance(format_url, compat_str): + format_url = url_or_none(playlist.get( + 'file%s' % ('_hd' if is_hd else ''))) + if not format_url: continue formats.append({ 'url': format_url, diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index f33fabe194..dff69fcb7a 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -3,11 +3,11 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( determine_ext, int_or_none, parse_duration, + url_or_none, ) @@ -50,8 +50,8 @@ def _real_extract(self, url): for encoding in encodings: if not isinstance(encoding, dict): continue - format_url = encoding.get('filename') - if not isinstance(format_url, compat_str): + format_url = url_or_none(encoding.get('filename')) + if not format_url: continue if determine_ext(format_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 547adefeba..ea0bce784c 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -3,13 +3,13 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, sanitized_Request, str_to_int, unescapeHTML, unified_strdate, + url_or_none, ) from ..aes import aes_decrypt_text @@ -88,8 +88,8 @@ def _real_extract(self, url): for definition in definitions: if not isinstance(definition, dict): continue - video_url = definition.get('videoUrl') - if isinstance(video_url, compat_str) and video_url: + video_url = url_or_none(definition.get('videoUrl')) + if video_url: links.append(video_url) # Fallback #1, this also contains extra low quality 180p format diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py index b5a3a07168..fb167c1985 100644 --- a/youtube_dl/extractor/zattoo.py +++ b/youtube_dl/extractor/zattoo.py @@ -13,6 +13,7 @@ ExtractorError, int_or_none, try_get, + url_or_none, urlencode_postdata, ) @@ -150,8 +151,8 @@ def _extract_formats(self, cid, video_id, record_id=None, is_live=False): for watch in watch_urls: if not isinstance(watch, dict): continue - watch_url = watch.get('url') - if not watch_url or not isinstance(watch_url, compat_str): + watch_url = url_or_none(watch.get('url')) + if not watch_url: continue format_id_list = [stream_type] maxrate = watch.get('maxrate') diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index bb9020c918..afa3f6c47f 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -15,6 +15,7 @@ try_get, unified_timestamp, update_url_query, + url_or_none, urljoin, ) @@ -67,8 +68,8 @@ class ZDFIE(ZDFBaseIE): def _extract_subtitles(src): subtitles = {} for caption in try_get(src, lambda x: x['captions'], list) or []: - subtitle_url = caption.get('uri') - if subtitle_url and isinstance(subtitle_url, compat_str): + subtitle_url = url_or_none(caption.get('uri')) + if subtitle_url: lang = caption.get('language', 'deu') subtitles.setdefault(lang, []).append({ 'url': subtitle_url, @@ -76,8 +77,8 @@ def _extract_subtitles(src): return subtitles def _extract_format(self, video_id, formats, format_urls, meta): - format_url = meta.get('url') - if not format_url or not isinstance(format_url, compat_str): + format_url = url_or_none(meta.get('url')) + if not format_url: return if format_url in format_urls: return @@ -152,7 +153,8 @@ def _extract_entry(self, url, player, content, video_id): content, lambda x: x['teaserImageRef']['layouts'], dict) if layouts: for layout_key, layout_url in layouts.items(): - if not isinstance(layout_url, compat_str): + layout_url = url_or_none(layout_url) + if not layout_url: continue thumbnail = { 'url': layout_url,