diff --git a/lab-web-scraping.ipynb b/lab-web-scraping.ipynb index e552783..c369afb 100644 --- a/lab-web-scraping.ipynb +++ b/lab-web-scraping.ipynb @@ -110,14 +110,101 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "40359eee-9cd7-4884-bfa4-83344c222305", "metadata": { "id": "40359eee-9cd7-4884-bfa4-83344c222305" }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: beautifulsoup4 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (4.12.3)\n", + "Requirement already satisfied: requests in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (2.32.3)\n", + "Requirement already satisfied: pandas in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (2.3.3)\n", + "Requirement already satisfied: soupsieve>1.2 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from beautifulsoup4) (2.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from requests) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from requests) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from requests) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from requests) (2025.11.12)\n", + "Requirement already satisfied: numpy>=1.26.0 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from pandas) (2.3.4)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from pandas) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from pandas) (2024.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from pandas) (2025.2)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n" + ] + } + ], + "source": [ + "!pip install beautifulsoup4 requests pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9c14bbcf", + "metadata": {}, "outputs": [], "source": [ - "# Your solution goes here" + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import pandas as pd\n", + "\n", + "def scrape_books(min_rating, max_price):\n", + " \n", + " \n", + " rating_map = {'One': 1, 'Two': 2, 'Three': 3, 'Four': 4, 'Five': 5}\n", + " books_list = []\n", + " \n", + "\n", + " url = \"https://books.toscrape.com/\"\n", + " response = requests.get(url)\n", + " soup = BeautifulSoup(response.content, 'html.parser')\n", + " \n", + "\n", + " books = soup.find_all('article', class_='product_pod')\n", + " \n", + "\n", + "\n", + "\n", + " for book in books:\n", + " title = book.find('h3').find('a')['title']\n", + " \n", + " price = float(book.find('p', class_='price_color').text.replace('£', ''))\n", + " \n", + " rating_class = book.find('p', class_='star-rating')['class'][1]\n", + " rating = rating_map[rating_class]\n", + " \n", + " if rating >= min_rating and price <= max_price:\n", + " books_list.append({'Title': title,'Price (£)': price,'Rating': rating})\n", + " \n", + " return pd.DataFrame(books_list)\n", + "\n", + "\n", + " \n", + " \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "81964a76", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Title Price (£) Rating\n", + "0 Set Me Free 17.46 5\n" + ] + } + ], + "source": [ + "df = scrape_books(min_rating=4, max_price=20)\n", + "print(df)" ] } ], @@ -126,7 +213,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -140,7 +227,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4,