summaryrefslogtreecommitdiff
path: root/Scripts/wiki2P.py
blob: 84f4d82401f251b4ec5db067c0a65802bbf71064 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import requests, bs4, re

# res = requests.get("https://en.wikipedia.org/wiki/Special:Random")
res = requests.get("https://en.wikipedia.org/wiki/Linux")


soup = bs4.BeautifulSoup(res.text, "html.parser")


element = soup.select("#mw-content-text a[title]")


pattern = re.compile("^\/.*")


for i in element:
	if "Edit section" not in i["title"] and pattern.match(i["href"]):
		if "div" not in str(i.parent) and "th" not in str(i.parent) and "td" not in str(i.parent):
			try:
				i["class"]
			except KeyError:
				print(i)
				break