mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 06:10:12 +01:00
Add the missing age_limit tags; added a devscript to do a superficial check for porn sites without the age_limit tag in the test
This commit is contained in:
parent
82f0ac657c
commit
750e9833b8
39
devscripts/check-porn.py
Normal file
39
devscripts/check-porn.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""
|
||||||
|
This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
|
||||||
|
if we are not 'age_limit' tagging some porn site
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from test.helper import get_testcases
|
||||||
|
from youtube_dl.utils import compat_urllib_request
|
||||||
|
|
||||||
|
for test in get_testcases():
|
||||||
|
try:
|
||||||
|
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
||||||
|
except:
|
||||||
|
print('\nFail: {0}'.format(test['name']))
|
||||||
|
continue
|
||||||
|
|
||||||
|
webpage = webpage.decode('utf8', 'replace')
|
||||||
|
|
||||||
|
if 'porn' in webpage.lower() and ('info_dict' not in test
|
||||||
|
or 'age_limit' not in test['info_dict']
|
||||||
|
or test['info_dict']['age_limit'] != 18):
|
||||||
|
print('\nPotential missing age_limit check: {0}'.format(test['name']))
|
||||||
|
|
||||||
|
elif 'porn' not in webpage.lower() and ('info_dict' in test and
|
||||||
|
'age_limit' in test['info_dict'] and
|
||||||
|
test['info_dict']['age_limit'] == 18):
|
||||||
|
print('\nPotential false negative: {0}'.format(test['name']))
|
||||||
|
|
||||||
|
else:
|
||||||
|
sys.stdout.write('.')
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
print()
|
|
@ -6,7 +6,6 @@
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
unescapeHTML,
|
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_decrypt_text
|
aes_decrypt_text
|
||||||
|
@ -20,6 +19,7 @@ class KeezMoviesIE(InfoExtractor):
|
||||||
u'md5': u'6e297b7e789329923fcf83abb67c9289',
|
u'md5': u'6e297b7e789329923fcf83abb67c9289',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"Petite Asian Lady Mai Playing In Bathtub",
|
u"title": u"Petite Asian Lady Mai Playing In Bathtub",
|
||||||
|
u"age_limit": 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,6 +48,8 @@ def _real_extract(self, url):
|
||||||
format = path.split('/')[4].split('_')[:2]
|
format = path.split('/')[4].split('_')[:2]
|
||||||
format = "-".join( format )
|
format = "-".join( format )
|
||||||
|
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
|
@ -55,4 +57,5 @@ def _real_extract(self, url):
|
||||||
'ext': extension,
|
'ext': extension,
|
||||||
'format': format,
|
'format': format,
|
||||||
'format_id': format,
|
'format_id': format,
|
||||||
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ class PornHubIE(InfoExtractor):
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"uploader": u"BABES-COM",
|
u"uploader": u"BABES-COM",
|
||||||
u"title": u"Seductive Indian beauty strips down and fingers her pink pussy",
|
u"title": u"Seductive Indian beauty strips down and fingers her pink pussy",
|
||||||
|
u"age_limit": 18
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,4 +65,5 @@ def _real_extract(self, url):
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor):
|
||||||
u'md5': u'374dd6dcedd24234453b295209aa69b6',
|
u'md5': u'374dd6dcedd24234453b295209aa69b6',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"upload_date": u"20090708",
|
u"upload_date": u"20090708",
|
||||||
u"title": u"Marilyn-Monroe-Bathing"
|
u"title": u"Marilyn-Monroe-Bathing",
|
||||||
|
u"age_limit": 18
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ class SpankwireIE(InfoExtractor):
|
||||||
u"uploader": u"oreusz",
|
u"uploader": u"oreusz",
|
||||||
u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
|
u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
|
||||||
u"description": u"Crazy Bitch X rated music video.",
|
u"description": u"Crazy Bitch X rated music video.",
|
||||||
|
u"age_limit": 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,6 +61,8 @@ def _real_extract(self, url):
|
||||||
})
|
})
|
||||||
formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
|
formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
|
||||||
|
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
|
@ -67,4 +70,5 @@ def _real_extract(self, url):
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'description': description,
|
'description': description,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ class Tube8IE(InfoExtractor):
|
||||||
u"description": u"hot teen Kasia grinding",
|
u"description": u"hot teen Kasia grinding",
|
||||||
u"uploader": u"unknown",
|
u"uploader": u"unknown",
|
||||||
u"title": u"Kasia music video",
|
u"title": u"Kasia music video",
|
||||||
|
u"age_limit": 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,4 +61,5 @@ def _real_extract(self, url):
|
||||||
'ext': extension,
|
'ext': extension,
|
||||||
'format': format,
|
'format': format,
|
||||||
'format_id': format,
|
'format_id': format,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,7 +13,8 @@ class YouJizzIE(InfoExtractor):
|
||||||
u'file': u'2189178.flv',
|
u'file': u'2189178.flv',
|
||||||
u'md5': u'07e15fa469ba384c7693fd246905547c',
|
u'md5': u'07e15fa469ba384c7693fd246905547c',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"Zeichentrick 1"
|
u"title": u"Zeichentrick 1",
|
||||||
|
u"age_limit": 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,6 +26,8 @@ def _real_extract(self, url):
|
||||||
# Get webpage content
|
# Get webpage content
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
# Get the video title
|
# Get the video title
|
||||||
video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
|
video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
|
||||||
webpage, u'title').strip()
|
webpage, u'title').strip()
|
||||||
|
@ -60,6 +63,7 @@ def _real_extract(self, url):
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'format': 'flv',
|
'format': 'flv',
|
||||||
'player_url': embed_page_url}
|
'player_url': embed_page_url,
|
||||||
|
'age_limit': age_limit}
|
||||||
|
|
||||||
return [info]
|
return [info]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user