mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 06:10:12 +01:00
[parsing] fix: don't push unmatched void tags onto queue
This commit is contained in:
parent
176a156c65
commit
8451074b50
|
@ -186,6 +186,9 @@ def test_get_element_text_and_html_by_tag(self):
|
|||
self.assertIsNone(get_element_text_and_html_by_tag('article', html))
|
||||
|
||||
def test_get_elements_text_and_html_by_tag(self):
|
||||
class StrictParser(MatchingElementParser):
|
||||
STRICT = True
|
||||
|
||||
test_string = '''
|
||||
<img src="a.png">
|
||||
<img src="b.png" />
|
||||
|
@ -194,6 +197,10 @@ def test_get_elements_text_and_html_by_tag(self):
|
|||
items = get_elements_text_and_html_by_tag('img', test_string)
|
||||
self.assertListEqual(items, [('', '<img src="a.png">'), ('', '<img src="b.png" />')])
|
||||
|
||||
self.assertEqual(
|
||||
StrictParser.get_element_text_and_html_by_tag('use', '<use><img></use>'),
|
||||
('<img>', '<use><img></use>'))
|
||||
|
||||
def test_get_element_text_and_html_by_tag_malformed(self):
|
||||
inner_text = 'inner text'
|
||||
malnested_elements = f'<malnested_a><malnested_b>{inner_text}</malnested_a></malnested_b>'
|
||||
|
|
|
@ -185,17 +185,19 @@ def handle_starttag(self, tag, attrs):
|
|||
tag_text = HTMLTagParser.ANY_TAG_REGEX.match(self.rawdata[self._offset:]).group()
|
||||
|
||||
tag_obj = tag
|
||||
tag_is_open = not (tag_text.endswith('/>') or tag in self.VOID_TAGS)
|
||||
if self.predicate(tag, attrs):
|
||||
tag_obj = self.Tag(tag, string=self.rawdata, attrs=attrs)
|
||||
tag_obj.openrange(self._offset, len(tag_text))
|
||||
if tag_text.endswith('/>') or tag in self.VOID_TAGS:
|
||||
if tag_is_open:
|
||||
nesting = []
|
||||
self._nestedtags[-1].append(nesting)
|
||||
self._nestedtags.append(nesting)
|
||||
else:
|
||||
self._nestedtags[-1].append(tag_obj)
|
||||
self.callback(tag_obj)
|
||||
return
|
||||
nesting = []
|
||||
self._nestedtags[-1].append(nesting)
|
||||
self._nestedtags.append(nesting)
|
||||
self.tagstack.appendleft(tag_obj)
|
||||
if tag_is_open:
|
||||
self.tagstack.appendleft(tag_obj)
|
||||
|
||||
handle_startendtag = handle_starttag
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user