blob: 84f4d82401f251b4ec5db067c0a65802bbf71064 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
import requests, bs4, re
# res = requests.get("https://en.wikipedia.org/wiki/Special:Random")
res = requests.get("https://en.wikipedia.org/wiki/Linux")
soup = bs4.BeautifulSoup(res.text, "html.parser")
element = soup.select("#mw-content-text a[title]")
pattern = re.compile("^\/.*")
for i in element:
if "Edit section" not in i["title"] and pattern.match(i["href"]):
if "div" not in str(i.parent) and "th" not in str(i.parent) and "td" not in str(i.parent):
try:
i["class"]
except KeyError:
print(i)
break
|