From 8d5f747e4f69c1093306a3124c42ca17392dc17c Mon Sep 17 00:00:00 2001
From: jbjjbjjbj <julianteule@gmail.com>
Date: Wed, 21 Dec 2016 22:12:47 +0100
Subject: Wiki2P

---
 Scripts/wiki2P.py | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

(limited to 'Scripts/wiki2P.py')

diff --git a/Scripts/wiki2P.py b/Scripts/wiki2P.py
index f0ded01..84f4d82 100644
--- a/Scripts/wiki2P.py
+++ b/Scripts/wiki2P.py
@@ -1,16 +1,25 @@
-import requests, bs4
+import requests, bs4, re
 
-#res = requests.get("https://en.wikipedia.org/wiki/Special:Random")
-res = requests.get("https://en.wikipedia.org/wiki/Study")
+# res = requests.get("https://en.wikipedia.org/wiki/Special:Random")
+res = requests.get("https://en.wikipedia.org/wiki/Linux")
 
 
-soup = bs4.BeautifulSoup(res.text)
+soup = bs4.BeautifulSoup(res.text, "html.parser")
 
 
-element = soup.select("p > a")
+element = soup.select("#mw-content-text a[title]")
+
+
+pattern = re.compile("^\/.*")
+
+
+for i in element:
+	if "Edit section" not in i["title"] and pattern.match(i["href"]):
+		if "div" not in str(i.parent) and "th" not in str(i.parent) and "td" not in str(i.parent):
+			try:
+				i["class"]
+			except KeyError:
+				print(i)
+				break
 
-print(element[0])
 
-#while( soup.select(".firstHeading")[0] != "Philosophy"):
-#	
-#	print(soup.select(".firstHeading")[0].text)
-- 
cgit v1.2.3