diff options
author | jbjjbjjbj <julianteule@gmail.com> | 2017-01-13 20:02:20 +0100 |
---|---|---|
committer | jbjjbjjbj <julianteule@gmail.com> | 2017-01-13 20:02:20 +0100 |
commit | ed3f8a5ba0dbdd9c0d3363568db4e47546513161 (patch) | |
tree | f08b6f119ae54eac979a3be360023a1489ecfeaa /Scripts | |
parent | 70b65b88ac2119600b68e1a75e3053459d171764 (diff) | |
parent | 8d5f747e4f69c1093306a3124c42ca17392dc17c (diff) |
Merge branch 'master' of https://github.com/jbjjbjjbj/newDotFiles
Diffstat (limited to 'Scripts')
-rw-r--r-- | Scripts/wiki2P.py | 27 |
1 files changed, 18 insertions, 9 deletions
diff --git a/Scripts/wiki2P.py b/Scripts/wiki2P.py index f0ded01..84f4d82 100644 --- a/Scripts/wiki2P.py +++ b/Scripts/wiki2P.py @@ -1,16 +1,25 @@ -import requests, bs4 +import requests, bs4, re -#res = requests.get("https://en.wikipedia.org/wiki/Special:Random") -res = requests.get("https://en.wikipedia.org/wiki/Study") +# res = requests.get("https://en.wikipedia.org/wiki/Special:Random") +res = requests.get("https://en.wikipedia.org/wiki/Linux") -soup = bs4.BeautifulSoup(res.text) +soup = bs4.BeautifulSoup(res.text, "html.parser") -element = soup.select("p > a") +element = soup.select("#mw-content-text a[title]") + + +pattern = re.compile("^\/.*") + + +for i in element: + if "Edit section" not in i["title"] and pattern.match(i["href"]): + if "div" not in str(i.parent) and "th" not in str(i.parent) and "td" not in str(i.parent): + try: + i["class"] + except KeyError: + print(i) + break -print(element[0]) -#while( soup.select(".firstHeading")[0] != "Philosophy"): -# -# print(soup.select(".firstHeading")[0].text) |