diff options
-rw-r--r-- | Scripts/wiki2P.py | 27 |
1 files changed, 18 insertions, 9 deletions
diff --git a/Scripts/wiki2P.py b/Scripts/wiki2P.py index f0ded01..84f4d82 100644 --- a/Scripts/wiki2P.py +++ b/Scripts/wiki2P.py @@ -1,16 +1,25 @@ -import requests, bs4 +import requests, bs4, re -#res = requests.get("https://en.wikipedia.org/wiki/Special:Random") -res = requests.get("https://en.wikipedia.org/wiki/Study") +# res = requests.get("https://en.wikipedia.org/wiki/Special:Random") +res = requests.get("https://en.wikipedia.org/wiki/Linux") -soup = bs4.BeautifulSoup(res.text) +soup = bs4.BeautifulSoup(res.text, "html.parser") -element = soup.select("p > a") +element = soup.select("#mw-content-text a[title]") + + +pattern = re.compile("^\/.*") + + +for i in element: + if "Edit section" not in i["title"] and pattern.match(i["href"]): + if "div" not in str(i.parent) and "th" not in str(i.parent) and "td" not in str(i.parent): + try: + i["class"] + except KeyError: + print(i) + break -print(element[0]) -#while( soup.select(".firstHeading")[0] != "Philosophy"): -# -# print(soup.select(".firstHeading")[0].text) |