forked from mikeizbicki/html_validator
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHTML_Validator.py
More file actions
74 lines (67 loc) · 2.13 KB
/
HTML_Validator.py
File metadata and controls
74 lines (67 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/bin/python3
def validate_html(html):
'''
This function performs a limited version of html validation by checking
whether every opening tag has a corresponding closing tag.
>>> validate_html('<strong>example</strong>')
True
>>> validate_html('<strong>example')
False
'''
# HINT:
# use the _extract_tags function below to generate a list of
# html tags without any extra text;
# then process these html tags using the balanced parentheses
# algorithm from the class/book
# the main difference between your code and
# the code from class will be that
# you will have to keep track of not just the 3 types of parentheses,
# but arbitrary text located between the html tags
tags = _extract_tags(html)
stack = []
if html == '':
return True
elif tags == []:
return False
else:
for tag in tags:
if '/' not in tag:
stack.append(tag)
else:
if len(stack) == 0:
return False
end = tag[2:-1]
beg = stack[-1][1:-1]
if beg == end:
stack.pop()
if len(stack) == 0:
return True
else:
return False
def _extract_tags(html):
'''
This is a helper function for `validate_html`.
By convention in Python, helper functions that are not meant to be
used directly by the user are prefixed with an underscore.
This function returns a list of all the html tags
contained in the input string,
stripping out all text not contained within angle brackets.
>>> _extract_tags('Python <strong>rocks</strong>!')
['<strong>', '</strong>']
'''
taglist = []
word = ''
openbracket = False
for i in range(len(html)):
if html[i] == '<':
openbracket = True
word = word + html[i]
elif html[i] == '>':
openbracket = False
word = word + html[i]
taglist.append(word)
word = ''
else:
if openbracket is True:
word = word + html[i]
return taglist