-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsimple_html.py
More file actions
43 lines (35 loc) · 989 Bytes
/
simple_html.py
File metadata and controls
43 lines (35 loc) · 989 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from bs4 import BeautifulSoup
SIMPLE_HTML = '''<html>
<head></head>
<body>
<h1>This is title</h1>
<p class="subtitle">Hi my name is Vasu.</p>
<p>Here's another p without a class</p>
<ul>
<li>Vasu</li>
<li>Kunj</li>
<li>Jay</li>
<li>Parth</li>
</ul>
</body>
</html>'''
simple_soup = BeautifulSoup(SIMPLE_HTML, 'html.parser')
# print(simple_soup.find('h1').string)
def find_title():
h1_tag = simple_soup.find('h1')
print(h1_tag.string)
def find_li():
li_tag = simple_soup.find_all('li')
li_contants = [e.string for e in li_tag]
print(li_contants)
def find_subtitle():
paragraph = simple_soup.find('p', {'class': 'subtitle'})
print(paragraph.string)
def find_other_paragraph():
paragraphs = simple_soup.find_all('p')
other_paragraph = [p for p in paragraphs if 'subtitle' not in p.attrs.get('class', [])]
print(other_paragraph[0].string)
find_title()
find_li()
find_subtitle()
find_other_paragraph()