Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 91 additions & 4 deletions lab-web-scraping.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,101 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "40359eee-9cd7-4884-bfa4-83344c222305",
"metadata": {
"id": "40359eee-9cd7-4884-bfa4-83344c222305"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: beautifulsoup4 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (4.12.3)\n",
"Requirement already satisfied: requests in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (2.32.3)\n",
"Requirement already satisfied: pandas in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (2.3.3)\n",
"Requirement already satisfied: soupsieve>1.2 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from beautifulsoup4) (2.5)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from requests) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from requests) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from requests) (2.3.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from requests) (2025.11.12)\n",
"Requirement already satisfied: numpy>=1.26.0 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from pandas) (2.3.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from pandas) (2025.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\mmouw\\anaconda3\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n"
]
}
],
"source": [
"!pip install beautifulsoup4 requests pandas"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9c14bbcf",
"metadata": {},
"outputs": [],
"source": [
"# Your solution goes here"
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import pandas as pd\n",
"\n",
"def scrape_books(min_rating, max_price):\n",
" \n",
" \n",
" rating_map = {'One': 1, 'Two': 2, 'Three': 3, 'Four': 4, 'Five': 5}\n",
" books_list = []\n",
" \n",
"\n",
" url = \"https://books.toscrape.com/\"\n",
" response = requests.get(url)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
" \n",
"\n",
" books = soup.find_all('article', class_='product_pod')\n",
" \n",
"\n",
"\n",
"\n",
" for book in books:\n",
" title = book.find('h3').find('a')['title']\n",
" \n",
" price = float(book.find('p', class_='price_color').text.replace('£', ''))\n",
" \n",
" rating_class = book.find('p', class_='star-rating')['class'][1]\n",
" rating = rating_map[rating_class]\n",
" \n",
" if rating >= min_rating and price <= max_price:\n",
" books_list.append({'Title': title,'Price (£)': price,'Rating': rating})\n",
" \n",
" return pd.DataFrame(books_list)\n",
"\n",
"\n",
" \n",
" \n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "81964a76",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Title Price (£) Rating\n",
"0 Set Me Free 17.46 5\n"
]
}
],
"source": [
"df = scrape_books(min_rating=4, max_price=20)\n",
"print(df)"
]
}
],
Expand All @@ -126,7 +213,7 @@
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "base",
"language": "python",
"name": "python3"
},
Expand All @@ -140,7 +227,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.13.5"
}
},
"nbformat": 4,
Expand Down