diff --git a/graphics/crawl_google_results.py b/graphics/crawl_google_results.py new file mode 100644 index 000000000000..20b9c02a43c3 --- /dev/null +++ b/graphics/crawl_google_results.py @@ -0,0 +1,24 @@ +import sys +import webbrowser + +import requests +from bs4 import BeautifulSoup # type: ignore +from fake_useragent import UserAgent # type: ignore + +if __name__ == "__main__": + print("Googling.....") + url = "https://www.google.com/search?q=" + " ".join(sys.argv[1:]) + res = requests.get(url, headers={"UserAgent": UserAgent().random}) + # res.raise_for_status() + with open("project1a.html", "wb") as out_file: # only for knowing the class + for data in res.iter_content(10000): + out_file.write(data) + soup = BeautifulSoup(res.text, "html.parser") + links = list(soup.select(".eZt8xd"))[:5] + + print(len(links)) + for link in links: + if link.text == "Maps": + webbrowser.open(link.get("href")) + else: + webbrowser.open(f"http://google.com{link.get('href')}")