From 750e9833b83c6e17a4efa8d5dac5b3cd848f4603 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Mon, 28 Oct 2013 01:50:17 -0400 Subject: [PATCH] Add the missing age_limit tags; added a devscript to do a superficial check for porn sites without the age_limit tag in the test --- devscripts/check-porn.py | 39 ++++++++++++++++++++++++++++++ youtube_dl/extractor/keezmovies.py | 5 +++- youtube_dl/extractor/pornhub.py | 2 ++ youtube_dl/extractor/pornotube.py | 3 ++- youtube_dl/extractor/spankwire.py | 4 +++ youtube_dl/extractor/tube8.py | 2 ++ youtube_dl/extractor/youjizz.py | 8 ++++-- 7 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 devscripts/check-porn.py diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py new file mode 100644 index 0000000000..63401fe18a --- /dev/null +++ b/devscripts/check-porn.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +""" +This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check +if we are not 'age_limit' tagging some porn site +""" + +# Allow direct execution +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import get_testcases +from youtube_dl.utils import compat_urllib_request + +for test in get_testcases(): + try: + webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() + except: + print('\nFail: {0}'.format(test['name'])) + continue + + webpage = webpage.decode('utf8', 'replace') + + if 'porn' in webpage.lower() and ('info_dict' not in test + or 'age_limit' not in test['info_dict'] + or test['info_dict']['age_limit'] != 18): + print('\nPotential missing age_limit check: {0}'.format(test['name'])) + + elif 'porn' not in webpage.lower() and ('info_dict' in test and + 'age_limit' in test['info_dict'] and + test['info_dict']['age_limit'] == 18): + print('\nPotential false negative: {0}'.format(test['name'])) + + else: + sys.stdout.write('.') + sys.stdout.flush() + +print() diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 23d5209d99..5e05900da2 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -6,7 +6,6 @@ compat_urllib_parse_urlparse, compat_urllib_request, compat_urllib_parse, - unescapeHTML, ) from ..aes import ( aes_decrypt_text @@ -20,6 +19,7 @@ class KeezMoviesIE(InfoExtractor): u'md5': u'6e297b7e789329923fcf83abb67c9289', u'info_dict': { u"title": u"Petite Asian Lady Mai Playing In Bathtub", + u"age_limit": 18, } } @@ -48,6 +48,8 @@ def _real_extract(self, url): format = path.split('/')[4].split('_')[:2] format = "-".join( format ) + age_limit = self._rta_search(webpage) + return { 'id': video_id, 'title': video_title, @@ -55,4 +57,5 @@ def _real_extract(self, url): 'ext': extension, 'format': format, 'format_id': format, + 'age_limit': age_limit, } diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 3dbd2ab699..5e2454f1b7 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -21,6 +21,7 @@ class PornHubIE(InfoExtractor): u'info_dict': { u"uploader": u"BABES-COM", u"title": u"Seductive Indian beauty strips down and fingers her pink pussy", + u"age_limit": 18 } } @@ -64,4 +65,5 @@ def _real_extract(self, url): 'title': video_title, 'thumbnail': thumbnail, 'formats': formats, + 'age_limit': 18, } diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index 5d770ec285..35dc5a9ffa 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor): u'md5': u'374dd6dcedd24234453b295209aa69b6', u'info_dict': { u"upload_date": u"20090708", - u"title": u"Marilyn-Monroe-Bathing" + u"title": u"Marilyn-Monroe-Bathing", + u"age_limit": 18 } } diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index f0d5009c71..32df0a7fb5 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -22,6 +22,7 @@ class SpankwireIE(InfoExtractor): u"uploader": u"oreusz", u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch", u"description": u"Crazy Bitch X rated music video.", + u"age_limit": 18, } } @@ -60,6 +61,8 @@ def _real_extract(self, url): }) formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) + age_limit = self._rta_search(webpage) + return { 'id': video_id, 'uploader': video_uploader, @@ -67,4 +70,5 @@ def _real_extract(self, url): 'thumbnail': thumbnail, 'description': description, 'formats': formats, + 'age_limit': age_limit, } diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index ebc8c1f4f1..aea9d9a240 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -22,6 +22,7 @@ class Tube8IE(InfoExtractor): u"description": u"hot teen Kasia grinding", u"uploader": u"unknown", u"title": u"Kasia music video", + u"age_limit": 18, } } @@ -60,4 +61,5 @@ def _real_extract(self, url): 'ext': extension, 'format': format, 'format_id': format, + 'age_limit': 18, } diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index 1265639e82..1fcc518acd 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -13,7 +13,8 @@ class YouJizzIE(InfoExtractor): u'file': u'2189178.flv', u'md5': u'07e15fa469ba384c7693fd246905547c', u'info_dict': { - u"title": u"Zeichentrick 1" + u"title": u"Zeichentrick 1", + u"age_limit": 18, } } @@ -25,6 +26,8 @@ def _real_extract(self, url): # Get webpage content webpage = self._download_webpage(url, video_id) + age_limit = self._rta_search(webpage) + # Get the video title video_title = self._html_search_regex(r'(?P<title>.*)', webpage, u'title').strip() @@ -60,6 +63,7 @@ def _real_extract(self, url): 'title': video_title, 'ext': 'flv', 'format': 'flv', - 'player_url': embed_page_url} + 'player_url': embed_page_url, + 'age_limit': age_limit} return [info]