-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathUntitled.py
More file actions
33 lines (29 loc) · 859 Bytes
/
Untitled.py
File metadata and controls
33 lines (29 loc) · 859 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
html = urlopen("http://en.wikipedia.org/wiki/Kevin_Bacon")
bsObj = BeautifulSoup(html, "html.parser")
"""nameList = bsObj.findAll("span", {"class":"green"})###(word, attribute)
for name in nameList:
print(name)
print(len(nameList))
"""
"""
alltext = bsObj.findAll(id='title', class_='text')
for t in alltext:
print(t)
"""
"""
print(bsObj.find('img',{'src':'../img/gifts/img1.jpg'
}).parent.previous_sibling.get_text())
"""
###images
'''
images = bsObj.findAll('img', {'src':re.compile('\.\.\/img\/gifts/img.*\.jpg')})
for image in images:
print(image['src'])
'''
###links
for link in bsObj.find('div', {'id':'bodyContent'}).findAll('a', href=re.compile('^(/wiki/)((?!:).)*$')):
if 'href' in link.attrs:
print(link.attrs['href'])