summaryrefslogtreecommitdiff
path: root/Scripts/wiki2P.py
diff options
context:
space:
mode:
authorjbjjbjjbj <julianteule@gmail.com>2017-01-13 20:02:20 +0100
committerjbjjbjjbj <julianteule@gmail.com>2017-01-13 20:02:20 +0100
commited3f8a5ba0dbdd9c0d3363568db4e47546513161 (patch)
treef08b6f119ae54eac979a3be360023a1489ecfeaa /Scripts/wiki2P.py
parent70b65b88ac2119600b68e1a75e3053459d171764 (diff)
parent8d5f747e4f69c1093306a3124c42ca17392dc17c (diff)
Merge branch 'master' of https://github.com/jbjjbjjbj/newDotFiles
Diffstat (limited to 'Scripts/wiki2P.py')
-rw-r--r--Scripts/wiki2P.py27
1 files changed, 18 insertions, 9 deletions
diff --git a/Scripts/wiki2P.py b/Scripts/wiki2P.py
index f0ded01..84f4d82 100644
--- a/Scripts/wiki2P.py
+++ b/Scripts/wiki2P.py
@@ -1,16 +1,25 @@
-import requests, bs4
+import requests, bs4, re
-#res = requests.get("https://en.wikipedia.org/wiki/Special:Random")
-res = requests.get("https://en.wikipedia.org/wiki/Study")
+# res = requests.get("https://en.wikipedia.org/wiki/Special:Random")
+res = requests.get("https://en.wikipedia.org/wiki/Linux")
-soup = bs4.BeautifulSoup(res.text)
+soup = bs4.BeautifulSoup(res.text, "html.parser")
-element = soup.select("p > a")
+element = soup.select("#mw-content-text a[title]")
+
+
+pattern = re.compile("^\/.*")
+
+
+for i in element:
+ if "Edit section" not in i["title"] and pattern.match(i["href"]):
+ if "div" not in str(i.parent) and "th" not in str(i.parent) and "td" not in str(i.parent):
+ try:
+ i["class"]
+ except KeyError:
+ print(i)
+ break
-print(element[0])
-#while( soup.select(".firstHeading")[0] != "Philosophy"):
-#
-# print(soup.select(".firstHeading")[0].text)