[core] _VALID_URLS implementation

- implemented _VALID_URLS extractor property
- improvement suggested by @Grub4k
This commit is contained in:
nixxo 2022-12-15 16:46:37 +01:00
parent c733555106
commit db96683cf1
No known key found for this signature in database
GPG Key ID: E0DE62EF9A9BFAB2
2 changed files with 10 additions and 2 deletions

View File

@ -14,7 +14,7 @@
NO_ATTR = object()
STATIC_CLASS_PROPERTIES = [
'IE_NAME', '_ENABLED', '_VALID_URL', # Used for URL matching
'IE_NAME', '_ENABLED', '_VALID_URL', '_VALID_URLS', # Used for URL matching
'_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY', # Used for --extractor-descriptions
'age_limit', # Used for --age-limit (evaluated)
'_RETURN_TYPE', # Accessed in CLI only with instance (evaluated)

View File

@ -447,7 +447,7 @@ class InfoExtractor:
Subclasses of this should also be added to the list of extractors and
should define a _VALID_URL regexp and, re-define the _real_extract() and
should define a _VALID_URL regexp (or a list of _VALID_URLS) and, re-define the _real_extract() and
(optionally) _real_initialize() methods.
Subclasses may also override suitable() if necessary, but ensure the function
@ -508,6 +508,7 @@ class InfoExtractor:
IE_DESC = None
SEARCH_KEY = None
_VALID_URL = None
_VALID_URLS = []
_EMBED_REGEX = []
def _login_hint(self, method=NO_DEFAULT, netrc=None):
@ -534,6 +535,13 @@ def __init__(self, downloader=None):
def _match_valid_url(cls, url):
if cls._VALID_URL is False:
return None
if cls._VALID_URLS:
if '_VALID_URLS_RE' not in cls.__dict__:
cls._VALID_URLS_RE = tuple(map(re.compile, cls._VALID_URLS))
return next(filter(None, (
valid_url_re.match(url) for valid_url_re in cls._VALID_URLS_RE)), None)
# This does not use has/getattr intentionally - we want to know whether
# we have cached the regexp for *this* class, whereas getattr would also
# match the superclass