summaryrefslogtreecommitdiff
path: root/Scripts/wiki2P.py
diff options
context:
space:
mode:
Diffstat (limited to 'Scripts/wiki2P.py')
-rw-r--r--Scripts/wiki2P.py47
1 files changed, 34 insertions, 13 deletions
diff --git a/Scripts/wiki2P.py b/Scripts/wiki2P.py
index 84f4d82..316c7e9 100644
--- a/Scripts/wiki2P.py
+++ b/Scripts/wiki2P.py
@@ -1,25 +1,46 @@
import requests, bs4, re
-# res = requests.get("https://en.wikipedia.org/wiki/Special:Random")
-res = requests.get("https://en.wikipedia.org/wiki/Linux")
-soup = bs4.BeautifulSoup(res.text, "html.parser")
+def calculate(site):
-element = soup.select("#mw-content-text a[title]")
+ print("Downloading wikipedia site: " + site)
-pattern = re.compile("^\/.*")
+ res = requests.get("https://en.wikipedia.org" + site)
+ print("Download completed analysing")
-for i in element:
- if "Edit section" not in i["title"] and pattern.match(i["href"]):
- if "div" not in str(i.parent) and "th" not in str(i.parent) and "td" not in str(i.parent):
- try:
- i["class"]
- except KeyError:
- print(i)
- break
+ soup = bs4.BeautifulSoup(res.text, "html.parser")
+ element = soup.find("div", {"class": "mw-content-ltr"}).find("p")
+
+
+ #print(element)
+
+ elements = element.find_all("a")
+
+
+
+
+ pattern = re.compile("\/wiki\/(?!File|Help).*")
+
+ results = ""
+
+
+ for i in elements:
+ if pattern.match(i["href"]) :
+ results = i["href"]
+ break
+
+ return results
+
+
+
+siter = "/wiki/Linux"
+
+while siter is not "philosophy":
+ input(siter)
+ siter = calculate(siter)