diff --git a/2.6_web_scraping (1).ipynb b/2.6_web_scraping (1).ipynb
new file mode 100644
index 0000000..82df147
--- /dev/null
+++ b/2.6_web_scraping (1).ipynb	
@@ -0,0 +1,1499 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "5171aeed-67b1-4643-bd50-1ffcdfdc9c41",
+      "metadata": {
+        "id": "5171aeed-67b1-4643-bd50-1ffcdfdc9c41"
+      },
+      "source": [
+        "# Web Scraping"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "3117eb6b-47f2-4bc0-afb7-04cab8095055",
+      "metadata": {
+        "id": "3117eb6b-47f2-4bc0-afb7-04cab8095055"
+      },
+      "source": [
+        "![legtsgo](https://media.giphy.com/media/dwmNhd5H7YAz6/giphy.gif)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "9cd76a2d-6ef9-4aca-a201-197d778af242",
+      "metadata": {
+        "id": "9cd76a2d-6ef9-4aca-a201-197d778af242",
+        "jp-MarkdownHeadingCollapsed": true,
+        "tags": []
+      },
+      "source": [
+        "By the end of this lesson, you will be able to:\n",
+        "\n",
+        "- Identify the primary components of web technologies and their roles: HTML, CSS, and JavaScript.\n",
+        "- Explain the hierarchical structure of HTML and the significance of tags, attributes, and their relationship.\n",
+        "- Utilize the **requests** and **Beautiful Soup** libraries to scrape data from a static web page.\n",
+        "- Construct and execute a script to scrape data from a webpage and export it into a structured text file using the pandas library."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "fb8e2aa9",
+      "metadata": {
+        "id": "fb8e2aa9",
+        "tags": [],
+        "toc": true
+      },
+      "source": [
+        "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
+        "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Web-Scraping\" data-toc-modified-id=\"Web-Scraping-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Web Scraping</a></span><ul class=\"toc-item\"><li><span><a href=\"#What-is-Web-Scraping\" data-toc-modified-id=\"What-is-Web-Scraping-1.1\"><span class=\"toc-item-num\">1.1&nbsp;&nbsp;</span>What is Web Scraping</a></span></li><li><span><a href=\"#Web-structure\" data-toc-modified-id=\"Web-structure-1.2\"><span class=\"toc-item-num\">1.2&nbsp;&nbsp;</span>Web structure</a></span></li><li><span><a href=\"#HTML\" data-toc-modified-id=\"HTML-1.3\"><span class=\"toc-item-num\">1.3&nbsp;&nbsp;</span>HTML</a></span><ul class=\"toc-item\"><li><span><a href=\"#Exploring-Web-Page-Structures\" data-toc-modified-id=\"Exploring-Web-Page-Structures-1.3.1\"><span class=\"toc-item-num\">1.3.1&nbsp;&nbsp;</span>Exploring Web Page Structures</a></span></li><li><span><a href=\"#Fact-1:-HTML-is-Built-on-Tags\" data-toc-modified-id=\"Fact-1:-HTML-is-Built-on-Tags-1.3.2\"><span class=\"toc-item-num\">1.3.2&nbsp;&nbsp;</span>Fact 1: HTML is Built on Tags</a></span></li><li><span><a href=\"#Fact-2:-Tags-Can-Have-Attributes\" data-toc-modified-id=\"Fact-2:-Tags-Can-Have-Attributes-1.3.3\"><span class=\"toc-item-num\">1.3.3&nbsp;&nbsp;</span>Fact 2: Tags Can Have Attributes</a></span></li><li><span><a href=\"#Fact-3:-Tags-Can-Be-Nested\" data-toc-modified-id=\"Fact-3:-Tags-Can-Be-Nested-1.3.4\"><span class=\"toc-item-num\">1.3.4&nbsp;&nbsp;</span>Fact 3: Tags Can Be Nested</a></span></li><li><span><a href=\"#Selecting-Specific-Elements-in-Web-Scraping\" data-toc-modified-id=\"Selecting-Specific-Elements-in-Web-Scraping-1.3.5\"><span class=\"toc-item-num\">1.3.5&nbsp;&nbsp;</span>Selecting Specific Elements in Web Scraping</a></span></li></ul></li><li><span><a href=\"#Web-Scraping-with-Python\" data-toc-modified-id=\"Web-Scraping-with-Python-1.4\"><span class=\"toc-item-num\">1.4&nbsp;&nbsp;</span>Web Scraping with Python</a></span><ul class=\"toc-item\"><li><span><a href=\"#Requests:-Fetching-a-Web-Page\" data-toc-modified-id=\"Requests:-Fetching-a-Web-Page-1.4.1\"><span class=\"toc-item-num\">1.4.1&nbsp;&nbsp;</span>Requests: Fetching a Web Page</a></span></li><li><span><a href=\"#Parsing-HTML-with-Beautiful-Soup\" data-toc-modified-id=\"Parsing-HTML-with-Beautiful-Soup-1.4.2\"><span class=\"toc-item-num\">1.4.2&nbsp;&nbsp;</span>Parsing HTML with Beautiful Soup</a></span><ul class=\"toc-item\"><li><span><a href=\"#Extracting-Data\" data-toc-modified-id=\"Extracting-Data-1.4.2.1\"><span class=\"toc-item-num\">1.4.2.1&nbsp;&nbsp;</span>Extracting Data</a></span></li><li><span><a href=\"#More-filtering-options\" data-toc-modified-id=\"More-filtering-options-1.4.2.2\"><span class=\"toc-item-num\">1.4.2.2&nbsp;&nbsp;</span>More filtering options</a></span></li><li><span><a href=\"#Creating-a-DataFrame-with-the-data\" data-toc-modified-id=\"Creating-a-DataFrame-with-the-data-1.4.2.3\"><span class=\"toc-item-num\">1.4.2.3&nbsp;&nbsp;</span>Creating a DataFrame with the data</a></span></li><li><span><a href=\"#💡-Check-for-understanding\" data-toc-modified-id=\"💡-Check-for-understanding-1.4.2.4\"><span class=\"toc-item-num\">1.4.2.4&nbsp;&nbsp;</span>💡 Check for understanding</a></span></li><li><span><a href=\"#Scraping-many-pages\" data-toc-modified-id=\"Scraping-many-pages-1.4.2.5\"><span class=\"toc-item-num\">1.4.2.5&nbsp;&nbsp;</span>Scraping many pages</a></span></li><li><span><a href=\"#CSS-selectors\" data-toc-modified-id=\"CSS-selectors-1.4.2.6\"><span class=\"toc-item-num\">1.4.2.6&nbsp;&nbsp;</span>CSS selectors</a></span></li></ul></li><li><span><a href=\"#More-examples\" data-toc-modified-id=\"More-examples-1.4.3\"><span class=\"toc-item-num\">1.4.3&nbsp;&nbsp;</span>More examples</a></span><ul class=\"toc-item\"><li><span><a href=\"#BBC\" data-toc-modified-id=\"BBC-1.4.3.1\"><span class=\"toc-item-num\">1.4.3.1&nbsp;&nbsp;</span>BBC</a></span></li></ul></li></ul></li><li><span><a href=\"#Comments\" data-toc-modified-id=\"Comments-1.5\"><span class=\"toc-item-num\">1.5&nbsp;&nbsp;</span>Comments</a></span></li><li><span><a href=\"#Summary\" data-toc-modified-id=\"Summary-1.6\"><span class=\"toc-item-num\">1.6&nbsp;&nbsp;</span>Summary</a></span></li><li><span><a href=\"#Further-materials\" data-toc-modified-id=\"Further-materials-1.7\"><span class=\"toc-item-num\">1.7&nbsp;&nbsp;</span>Further materials</a></span><ul class=\"toc-item\"><li><span><a href=\"#How-to-Solve-a-403-Error\" data-toc-modified-id=\"How-to-Solve-a-403-Error-1.7.1\"><span class=\"toc-item-num\">1.7.1&nbsp;&nbsp;</span>How to Solve a 403 Error</a></span></li></ul></li></ul></li></ul></div>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "f65c0446-74e5-4e78-89e4-e14bb186407a",
+      "metadata": {
+        "id": "f65c0446-74e5-4e78-89e4-e14bb186407a"
+      },
+      "source": [
+        "## What is Web Scraping"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "3fe39426-4de3-433c-9058-06506fc6e056",
+      "metadata": {
+        "id": "3fe39426-4de3-433c-9058-06506fc6e056"
+      },
+      "source": [
+        "Web scraping is a method employed by data analysts and developers to retrieve information from web pages. It involves fetching a web page and then parsing that page to obtain desired information. This technique is especially useful when the desired data isn't available through APIs. The extracted data can then be cleaned, analyzed, or stored in databases for further data analytics tasks."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "421cf815-cf22-478d-871e-a5ff67219d0b",
+      "metadata": {
+        "id": "421cf815-cf22-478d-871e-a5ff67219d0b"
+      },
+      "source": [
+        "## Web structure"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "56ebf2f1-04c5-4dca-96fa-02cbd095fd09",
+      "metadata": {
+        "id": "56ebf2f1-04c5-4dca-96fa-02cbd095fd09"
+      },
+      "source": [
+        "The fundamental web technologies that form the structure of the websites we aim to scrape are:\n",
+        "\n",
+        "- **HTML**: Standing as the backbone of almost all websites, HTML, the core markup language, is instrumental in creating web pages. It houses all the content available on a webpage.\n",
+        "  \n",
+        "- **CSS**: This stylesheet language works alongside HTML, taking charge of the presentation aspect of the webpages. It controls how HTML elements are displayed, setting the stage for a visually pleasing and organized web interface.\n",
+        "\n",
+        "- **JavaScript**: Adding a dynamic touch to the websites, JavaScript comes into play to create interactive and animated content. This programming language has the power to alter webpage content even after it has loaded, bringing a dynamic and responsive element to web designs.\n",
+        "\n",
+        "In this lesson, we will work with the HTML from the websites."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "10d38b73-2533-43f6-a765-0ad26dfa61f3",
+      "metadata": {
+        "id": "10d38b73-2533-43f6-a765-0ad26dfa61f3"
+      },
+      "source": [
+        "## HTML"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "ddca406e-f9ab-48d1-b29b-eaed702dafd6",
+      "metadata": {
+        "id": "ddca406e-f9ab-48d1-b29b-eaed702dafd6"
+      },
+      "source": [
+        "In the realm of web scraping, understanding HTML (Hypertext Markup Language) is crucial.\n",
+        "\n",
+        "HTML is the standard markup language used to create web pages. Think of it as the skeleton or blueprint of a website. It structures content on the web, defining elements like paragraphs, headings, links, lists, and images. These elements are represented by \"tags\", which enclose content to give it meaning and context.\n",
+        "\n",
+        "When web scraping, you'll often navigate through this HTML structure to pinpoint and extract the exact data you need. Tools like web browsers' \"Inspect\" or \"View Source\" features allow you to see the underlying HTML of a page, which is invaluable when determining how to access specific pieces of content programmatically."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "6ba82ec5-2130-41ac-bf06-9a981b2e65b6",
+      "metadata": {
+        "id": "6ba82ec5-2130-41ac-bf06-9a981b2e65b6"
+      },
+      "source": [
+        "![](https://github.com/data-bootcamp-v4/lessons/blob/main/img/html.png?raw=true)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "2f3291a2-5fc6-4e93-b84c-4ef6d45bb433",
+      "metadata": {
+        "id": "2f3291a2-5fc6-4e93-b84c-4ef6d45bb433"
+      },
+      "source": [
+        "### Exploring Web Page Structures\n",
+        "\n",
+        "To inspect the underlying HTML of a web page, right-click anywhere on the page. Choose \"View Page Source\" in browsers like Chrome or Firefox. For Internet Explorer, choose \"View Source,\" and for Safari, select \"Show Page Source.\" (In Safari, if this option isn't visible, navigate to Safari Preferences, click on the Advanced tab, and enable \"Show Develop menu in menu bar.\")\n",
+        "\n",
+        "To embark on your web scraping journey, you just need to grasp **three foundational aspects** of HTML.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "70d58c48-9b0d-4aa6-8903-e5cf504d16cd",
+      "metadata": {
+        "id": "70d58c48-9b0d-4aa6-8903-e5cf504d16cd"
+      },
+      "source": [
+        "### Fact 1: HTML is Built on Tags\n",
+        "\n",
+        "At its core, HTML is composed of content enveloped in `<tags>`. It typically houses the textual content we aim to scrape, highlighted by these \"tags\" enclosed by angle brackets. These tags give structure and meaning, instructing the browser on how to present the content. The acronym \"HTML\" stands for Hyper Text Markup Language.\n",
+        "\n",
+        "HTML follows a tree-like structure, encompassing parent tags, child tags, and sibling tags:\n",
+        "```\n",
+        "<html>\n",
+        "    <head>\n",
+        "        <title>Page Title</title>\n",
+        "    </head>\n",
+        "    <body>\n",
+        "        <h1>My First Heading</h1>\n",
+        "        <p>My first paragraph.</p>\n",
+        "    </body>\n",
+        "</html>\n",
+        "```\n",
+        "\n",
+        "For instance, consider the `<strong>` tag, signaling bold formatting. If \"Jan. 21\" is encapsulated between an opening `<strong>` tag and its corresponding closing `</strong>` tag, it denotes where the bold styling begins and ends. This pair of tags instructs the browser to render the enclosed text, \"Jan. 21\", in bold.\n",
+        "\n",
+        "Tags come in various types, each suited to encapsulate specific content:\n",
+        " * **Headings**: `<h1>`, `<h2>`, `<h3>`, `<h4>`...\n",
+        " * **Phrasing**: `<b>`, `<strong>`, `<sub>`, `<i>`, `<a>`...\n",
+        " * **Embedded Content**: `<audio>`, `<img>`, `<video>`, `<iframe>`...\n",
+        " * **Tabulated Data**: `<table>`, `<tr>`, `<td>`, `<tbody>`...\n",
+        " * **Page Sections**: `<header>`, `<section>`, `<nav>`, `<article>`...\n",
+        " * **Metadata and Scripts**: `<meta>`, `<title>`, `<script>`, `<link>`...\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "fcc87e26-c9b3-42b1-8be6-cabd75da78e5",
+      "metadata": {
+        "id": "fcc87e26-c9b3-42b1-8be6-cabd75da78e5"
+      },
+      "source": [
+        "### Fact 2: Tags Can Have Attributes\n",
+        "\n",
+        "HTML tags can possess \"attributes,\" which are defined within the opening tag itself.\n",
+        "\n",
+        "Examine the following examples:\n",
+        "\n",
+        "- `<span class=\"short-desc\">`: Here, the `<span>` tag has a `class` attribute with the value \"short-desc\".\n",
+        "- `<div> Zapas Marca Joma X54 </div>`: This tag doesn't contain any attributes.\n",
+        "- `<div class=\"price-item\" id=\"offer\"> Zapas Marca Joma X54 </div>`: The `div` tag here has two attributes - `class` with the value \"price-item\" and `id` with the value \"offer\".\n",
+        "- `<a class=\"text-monospace\" id=\"name_132\" href=\"http://www.example.com\"> Page Content </a>\n",
+        "`: This `div` tag encompasses the following attributes:\n",
+        "    + `class`: With the value \"text-monospace\". Remember, the class isn't unique across the page.\n",
+        "    + `id`: With the value \"name_132\". IDs are meant to be unique identifiers for tags on the page.\n",
+        "    + `href`: With the value www.example.com. The href commonly represents a link to another section of the page or to an external website.\n",
+        "\n",
+        "**Key Notes**:\n",
+        "- The `id` attribute should be unique for a tag; no two tags should share the same `id`.\n",
+        "- The `class` attribute isn't meant to be unique. Instead, it often groups tags exhibiting similar behavior or styles.\n",
+        "\n",
+        "For web scraping purposes, **understanding the semantics** behind terms like `<span>`, `class`, or `short-desc` **isn't crucial**. The key takeaway is recognizing that tags can possess attributes and understanding their structural representation. When extracting content, our goal is to pinpoint the right tags within a webpage's HTML."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "5f8900aa-a3da-44b4-931d-62d3ae9282fa",
+      "metadata": {
+        "id": "5f8900aa-a3da-44b4-931d-62d3ae9282fa"
+      },
+      "source": [
+        "**Other commonly used attributes in HTML**\n",
+        "\n",
+        "Several attributes in HTML provide additional information or modify elements. Some of these frequently used attributes include:\n",
+        "\n",
+        " * **`dir`**: Determines the text direction within an element, allowing for either forward or backward writing.\n",
+        " * **`lang`**: Designates the language of the content within an element.\n",
+        " * **`style`**: Applies inline styling to an element (Note: This shouldn't be mixed up with the `<style>` tag).\n",
+        " * **`title`**: Offers supplementary details about an element, often displayed as a tooltip (Important: This is distinct from the `<title>` tag).\n",
+        "\n",
+        "...and many more.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "02985a03-251a-4fa7-b969-4afa023cdc6a",
+      "metadata": {
+        "id": "02985a03-251a-4fa7-b969-4afa023cdc6a"
+      },
+      "source": [
+        "\n",
+        "### Fact 3: Tags Can Be Nested\n",
+        "\n",
+        "Imagine the following segment of HTML code:\n",
+        "\n",
+        "`Hello <strong><em>Ironhack</em> students</strong>`\n",
+        "\n",
+        "Here, the phrase **Ironhack students** would be displayed in bold since it resides between the `<strong>` and `</strong>` tags. Additionally, the word ***Ironhack*** would be italicized due to the `<em>` tag, which signifies italic formatting. However, the word \"Hello\" remains unaffected by any formatting, as it lies outside both the `<strong>` and `<em>` tags. This results in the display:\n",
+        "\n",
+        "Hello ***Ironhack* students**\n",
+        "\n",
+        "This example illustrates a key principle: **tags influence the text from their opening to their closing points,** even if they are nested within other tags."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "575aadd7-e54f-4751-ba64-cd660110213a",
+      "metadata": {
+        "id": "575aadd7-e54f-4751-ba64-cd660110213a"
+      },
+      "source": [
+        "### Selecting Specific Elements in Web Scraping\n",
+        "\n",
+        "When diving into web scraping, it's essential to target specific elements efficiently. To hone in on the precise content you need, consider filtering tags based on:\n",
+        "\n",
+        " * **Tag Name**: The main type of the element (e.g., `<div>`, `<a>`, `<p>`).\n",
+        " * **Class**: A descriptor that groups multiple elements with similar characteristics.\n",
+        " * **ID**: A unique identifier assigned to a particular element.\n",
+        " * **Other Attributes**: Additional properties like `href`, `title`, or `lang` that can further specify the elements of interest.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "8bde712f-962d-4222-a17e-781df6177cef",
+      "metadata": {
+        "id": "8bde712f-962d-4222-a17e-781df6177cef"
+      },
+      "source": [
+        "## Web Scraping with Python"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "2f38b3d0-93f3-47f0-a0bd-8bd81e6ccee9",
+      "metadata": {
+        "id": "2f38b3d0-93f3-47f0-a0bd-8bd81e6ccee9"
+      },
+      "source": [
+        "In this lesson, we'll use the `requests` library to fetch web pages and `Beautiful Soup` from the `bs4` package to parse these pages and extract information."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "99a9e8b0-bada-46e6-8a30-b2eed9d2f420",
+      "metadata": {
+        "id": "99a9e8b0-bada-46e6-8a30-b2eed9d2f420"
+      },
+      "source": [
+        "Ensure you've installed the required packages:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "f06a7356-dd80-44a9-8a75-4b5b45092d6c",
+      "metadata": {
+        "id": "f06a7356-dd80-44a9-8a75-4b5b45092d6c"
+      },
+      "outputs": [],
+      "source": [
+        "#!pip install requests beautifulsoup4"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "2cd2312e-99ff-4aba-b99f-0c2ca667b76a",
+      "metadata": {
+        "id": "2cd2312e-99ff-4aba-b99f-0c2ca667b76a"
+      },
+      "source": [
+        "### Requests: Fetching a Web Page\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "81f1852a-6fc1-4738-b9f6-0488e0611936",
+      "metadata": {
+        "id": "81f1852a-6fc1-4738-b9f6-0488e0611936"
+      },
+      "source": [
+        "First, we use the `requests` library to fetch the content of a webpage."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "5204599c-3c08-469c-9aa3-7427a8e9528b",
+      "metadata": {
+        "id": "5204599c-3c08-469c-9aa3-7427a8e9528b"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e23cedbf-4b0f-4377-b83e-71c96ba51d93",
+      "metadata": {
+        "id": "e23cedbf-4b0f-4377-b83e-71c96ba51d93"
+      },
+      "source": [
+        "The provided code retrieves the webpage content from the given URL and saves it in a `response` object. This object possesses either a `text` or `content` attribute, holding the HTML code similar to what we observe when inspecting the source in a web browser."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "264fd211-fe05-4064-8c25-8962ad2efb8f",
+      "metadata": {
+        "id": "264fd211-fe05-4064-8c25-8962ad2efb8f",
+        "scrolled": true,
+        "tags": []
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "bac2fd3a-a5ed-4805-9d18-aa150b1d7da5",
+      "metadata": {
+        "id": "bac2fd3a-a5ed-4805-9d18-aa150b1d7da5"
+      },
+      "source": [
+        "When interacting with APIs, we typically receive data in JSON format. However, web scraping provides us with HTML, which can be challenging to navigate. Fortunately, Beautiful Soup simplifies this process, making our work more manageable!"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "b418ebf3-2c95-4928-8348-70384edd8a99",
+      "metadata": {
+        "id": "b418ebf3-2c95-4928-8348-70384edd8a99"
+      },
+      "source": [
+        "### Parsing HTML with Beautiful Soup\n",
+        "\n",
+        "To parse the HTML, we'll employ the renowned Python library, [Beautiful Soup 4](https://www.crummy.com/software/BeautifulSoup/bs4/doc/). For a deeper understanding of its functionalities, explore the [official documentation](https://www.crummy.com/software/BeautifulSoup/bs4/doc/).\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "b3f1bd18-38e8-457d-9822-b8881a819830",
+      "metadata": {
+        "id": "b3f1bd18-38e8-457d-9822-b8881a819830"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "9b7fb133-fabd-4ef9-a7b3-d464fc4c5c7f",
+      "metadata": {
+        "id": "9b7fb133-fabd-4ef9-a7b3-d464fc4c5c7f"
+      },
+      "source": [
+        "The code above parses the HTML (stored in `response.content`) into a special object called `soup` that the Beautiful Soup library understands. In other words, Beautiful Soup is **reading the HTML and making sense of its structure.**"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "cd878595-2c5d-42d4-81bf-d7b8abcadefe",
+      "metadata": {
+        "id": "cd878595-2c5d-42d4-81bf-d7b8abcadefe"
+      },
+      "source": [
+        "#### Extracting Data"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e78e0f42-7116-4890-9b7b-c8dc2ec0243a",
+      "metadata": {
+        "id": "e78e0f42-7116-4890-9b7b-c8dc2ec0243a"
+      },
+      "source": [
+        "`find` and `findAll` (or its equivalent `find_all`) are methods used to search the soup tree for tags that match a certain criterion.\n",
+        "\n",
+        "1. **find**:\n",
+        "    - Returns only the **first** tag that matches a given set of criteria.\n",
+        "    - Useful when you know there's only one tag of interest or you only want the first occurrence.\n",
+        "    - Example: If you have multiple `<p>` tags on a page and you use `soup.find('p')`, you'll get only the first `<p>` tag.\n",
+        "\n",
+        "2. **findAll (or find_all)**:\n",
+        "    - Returns a **list** of tags that match the given criteria.\n",
+        "    - Useful when you want to capture all occurrences of a particular tag or set of tags.\n",
+        "    - Example: Using `soup.find_all('p')` will give you a list containing all `<p>` tags on the page.\n",
+        "\n",
+        "Here's a simple illustration:\n",
+        "\n",
+        "```html\n",
+        "<html>\n",
+        "    <body>\n",
+        "        <p>First paragraph.</p>\n",
+        "        <p>Second paragraph.</p>\n",
+        "        <div>Some div.</div>\n",
+        "    </body>\n",
+        "</html>\n",
+        "```\n",
+        "\n",
+        "Using `find('p')` would return the \"First paragraph.\" while `find_all('p')` would return a list containing both \"First paragraph.\" and \"Second paragraph.\".\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "a6a8ed39-eaec-46b3-bcb4-4f3d070e752f",
+      "metadata": {
+        "id": "a6a8ed39-eaec-46b3-bcb4-4f3d070e752f"
+      },
+      "source": [
+        "Let's look at different ways of extracting data."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "8b5db303-f81e-4c6b-9136-f74bb4191a38",
+      "metadata": {
+        "id": "8b5db303-f81e-4c6b-9136-f74bb4191a38"
+      },
+      "source": [
+        "##### **By Tag**"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "43ed18e1-b4d3-4a3e-99f2-b093874a29b9",
+      "metadata": {
+        "id": "43ed18e1-b4d3-4a3e-99f2-b093874a29b9"
+      },
+      "source": [
+        "Let's start with a popular tag: `title`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "dc5e6973-c632-4f7f-846b-5203a6eededd",
+      "metadata": {
+        "id": "dc5e6973-c632-4f7f-846b-5203a6eededd"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "65003137-703d-4c98-a988-350ee71d6061",
+      "metadata": {
+        "id": "65003137-703d-4c98-a988-350ee71d6061"
+      },
+      "source": [
+        "##### **By Class**"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "9879ccce-cc1e-46d4-a1ef-9c07ec39ae63",
+      "metadata": {
+        "id": "9879ccce-cc1e-46d4-a1ef-9c07ec39ae63"
+      },
+      "source": [
+        "To search for HTML elements by class in a webpage using BeautifulSoup, you can also use the `find` and `find_all` methods.\n",
+        "\n",
+        "1. **Using `find` method to get the first matching element**:\n",
+        "   \n",
+        "   ```python\n",
+        "   result = soup.find(class_='your-class-name')\n",
+        "   ```\n",
+        "\n",
+        "2. **Using `find_all` method to get a list of all matching elements**:\n",
+        "\n",
+        "   ```python\n",
+        "   results = soup.find_all(class_='your-class-name')\n",
+        "   ```\n",
+        "   \n",
+        "Note that we are using the `class_` parameter because `class` is a reserved keyword in Python."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "91753bcf-eec5-464c-9ef1-30dff368fa4c",
+      "metadata": {
+        "id": "91753bcf-eec5-464c-9ef1-30dff368fa4c"
+      },
+      "source": [
+        "Let's dive into our target URL and explore its structure. Our objective is to craft a dataframe populated with bicycle names and their corresponding prices.\n",
+        "\n",
+        "To pinpoint the exact tags housing this information, follow these steps:\n",
+        "1. Navigate to the website in your browser.\n",
+        "2. Locate a bicycle name, right-click on it, and choose 'Inspect'. This action will direct you to the element within the site's HTML. Identify the tags so we can extract our desired data.\n",
+        "3. Repeat the same process for the price."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "38fa381e-6398-4435-b076-23e17fbb312e",
+      "metadata": {
+        "id": "38fa381e-6398-4435-b076-23e17fbb312e"
+      },
+      "source": [
+        "Note: the bicycle names and prices will change depending on the newest bikes in the shop."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "df16613a-f98b-4eb9-928b-9de391d41250",
+      "metadata": {
+        "id": "df16613a-f98b-4eb9-928b-9de391d41250"
+      },
+      "source": [
+        "Let's filter all elements which `class` is `de-ProductTile-title`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "52207051-e7c8-4c85-9779-a88841397cc9",
+      "metadata": {
+        "id": "52207051-e7c8-4c85-9779-a88841397cc9"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "d2a6c9ed-f492-420e-8276-510217323d4e",
+      "metadata": {
+        "id": "d2a6c9ed-f492-420e-8276-510217323d4e"
+      },
+      "source": [
+        "In this case, the results of `class` `de-ProductTile-title` are all inside `h4` tags and we actually got the information we wanted. But what if the `class` `de-ProductTile-title` was inside different `tags` and we only want the results of the `tag h4`?"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "a8c776a4-b560-4ae8-ae76-eb30acee0d44",
+      "metadata": {
+        "id": "a8c776a4-b560-4ae8-ae76-eb30acee0d44"
+      },
+      "source": [
+        "##### **By Tag and Class**"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "66ffa178-c4f5-48dc-ac6a-3b1dca1a8c7f",
+      "metadata": {
+        "id": "66ffa178-c4f5-48dc-ac6a-3b1dca1a8c7f"
+      },
+      "source": [
+        "BeautifulSoup allows filtering results using combinations, such as filtering by tag and class."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "8daff1d0-e15e-4674-b801-1d9d7d14fb82",
+      "metadata": {
+        "id": "8daff1d0-e15e-4674-b801-1d9d7d14fb82"
+      },
+      "source": [
+        "```python\n",
+        "tags = soup.find_all(name=tag_name, class_=class_name)\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "9e3dc552-1a70-40e9-88e5-bb222fef5f14",
+      "metadata": {
+        "id": "9e3dc552-1a70-40e9-88e5-bb222fef5f14"
+      },
+      "source": [
+        "We can use a for loop to iterate over the results and do whatever we need to do."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "8ee3eba9-bfb7-45e9-bf9e-02365231e8b0",
+      "metadata": {
+        "id": "8ee3eba9-bfb7-45e9-bf9e-02365231e8b0"
+      },
+      "source": [
+        "To extract the names from the provided HTML content, you can:\n",
+        "\n",
+        "1. Use the `find_all` or `findAll` method to locate the `<h4>` tags with the specific class (`de-ProductTile-title` in this case).\n",
+        "2. Extract the text from the found tag."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "08ec4b7e-b614-4dd6-97e9-873cb6c612b8",
+      "metadata": {
+        "id": "08ec4b7e-b614-4dd6-97e9-873cb6c612b8"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "79b57e18-0165-4b11-b341-87cdc492bfa2",
+      "metadata": {
+        "id": "79b57e18-0165-4b11-b341-87cdc492bfa2"
+      },
+      "source": [
+        "To extract the price from the provided HTML content, you can:\n",
+        "\n",
+        "1. Use the `find_all` method to locate the `<span>` tags with the specific class (`js-de-ProductTile-currentPrice` in this case).\n",
+        "2. Extract the text from the found tag.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "d36515f9-9e63-4944-a1ad-f6b6b9781fb8",
+      "metadata": {
+        "id": "d36515f9-9e63-4944-a1ad-f6b6b9781fb8"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "87845a9c-ab2c-4f29-b69e-7eb8804a5866",
+      "metadata": {
+        "id": "87845a9c-ab2c-4f29-b69e-7eb8804a5866"
+      },
+      "source": [
+        "##### **Getting other attributes**"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "fdf7d9aa-a1c7-455e-b9fd-f3079f3fe3bb",
+      "metadata": {
+        "id": "fdf7d9aa-a1c7-455e-b9fd-f3079f3fe3bb"
+      },
+      "source": [
+        "To access other attribute values such as hyperlinks (which are usually contained in the `href` attribute of `a` tags), you first locate the element using BeautifulSoup methods such as `find` or `find_all`, and then use the `.get()` method to retrieve the value of the attribute you're interested in. Here is a step-by-step explanation:\n",
+        "\n",
+        "1. **Locate the element**: Use `find` or `find_all` to locate the element(s) that contain the attribute you want to access.\n",
+        "\n",
+        "    ```python\n",
+        "    link_element = soup.find('a', class_='link-class')\n",
+        "    ```\n",
+        "\n",
+        "2. **Access the attribute**: Once you have the element, use the `.get()` method to access the attribute value.\n",
+        "\n",
+        "    ```python\n",
+        "    link_url = link_element.get('href')\n",
+        "    ```\n",
+        "\n",
+        "In the above snippet:\n",
+        "- We first find the `a` element with the class `'link-class'`.\n",
+        "- We then get the value of the `href` attribute which contains the hyperlink.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "6e4db851-a8bd-4799-aaa8-2d49c0a4487d",
+      "metadata": {
+        "id": "6e4db851-a8bd-4799-aaa8-2d49c0a4487d"
+      },
+      "source": [
+        "When inspecting the website, we saw that the bicycle title was a link. How can we get that link?\n",
+        "Lets inspect the whole element containing the bicycle name instead of just the name.\n",
+        "\n",
+        "We can see that we have:\n",
+        "    \n",
+        "    <a class=\"de-u-linkClean js-de-ProductTile-link\" href=\"/collections/mountain-bikes/products/mountain-bike-275-rockrider-st-100-196952-192872\">\n",
+        "        \n",
+        "Note: this link will change depending on the newest bike in the shop."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "2b35b659-c5f5-46d9-84c7-8fcb68023828",
+      "metadata": {
+        "id": "2b35b659-c5f5-46d9-84c7-8fcb68023828"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "2d5084e2-ff57-4ccf-ac04-c17d7e76d662",
+      "metadata": {
+        "id": "2d5084e2-ff57-4ccf-ac04-c17d7e76d662"
+      },
+      "source": [
+        "#### More filtering options"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "7afb8ffe-82f5-4ca2-9ca4-aa6a8716e402",
+      "metadata": {
+        "id": "7afb8ffe-82f5-4ca2-9ca4-aa6a8716e402"
+      },
+      "source": [
+        "##### Filtering by Multiple Tags"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "c271307e-ee15-4d10-9336-da52c7ad39d2",
+      "metadata": {
+        "id": "c271307e-ee15-4d10-9336-da52c7ad39d2"
+      },
+      "source": [
+        "To find elements with multiple possible tags, you can pass a list of tag names to `find_all`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "1be1b743-08b4-4ca4-983d-f407de373c2f",
+      "metadata": {
+        "id": "1be1b743-08b4-4ca4-983d-f407de373c2f"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "a979ced7-a3b6-4fbe-80ba-2ba6ce68ee87",
+      "metadata": {
+        "id": "a979ced7-a3b6-4fbe-80ba-2ba6ce68ee87"
+      },
+      "source": [
+        "##### Filtering by Multiple Classes"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "d44dd19c-f22e-4c3b-8c45-322a792d89a3",
+      "metadata": {
+        "id": "d44dd19c-f22e-4c3b-8c45-322a792d89a3"
+      },
+      "source": [
+        "To find elements with multiple possible classes, you can pass a list of class names.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "667c8f37-155c-4d88-a4fa-88e912563086",
+      "metadata": {
+        "id": "667c8f37-155c-4d88-a4fa-88e912563086"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "b9da449c-afe2-4e5b-8056-7625d8617bf4",
+      "metadata": {
+        "id": "b9da449c-afe2-4e5b-8056-7625d8617bf4"
+      },
+      "source": [
+        "##### Combining Multiple Criteria"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "a695ddf4-3fa6-4224-b5dd-a9c30b1d6add",
+      "metadata": {
+        "id": "a695ddf4-3fa6-4224-b5dd-a9c30b1d6add"
+      },
+      "source": [
+        "You can combine multiple criteria by using the `attrs` argument."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "3ffba290-375b-4d89-bc48-2536d52d1a44",
+      "metadata": {
+        "id": "3ffba290-375b-4d89-bc48-2536d52d1a44"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "3b27413d-16c3-4637-830a-997d9a9cac16",
+      "metadata": {
+        "id": "3b27413d-16c3-4637-830a-997d9a9cac16"
+      },
+      "source": [
+        "##### Limiting the Results"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "a7a3e759-aabb-496b-8c4b-aeb7c812247e",
+      "metadata": {
+        "id": "a7a3e759-aabb-496b-8c4b-aeb7c812247e"
+      },
+      "source": [
+        "You can limit the number of results returned by `find_all` using the `limit` parameter."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "9c0fb6fb-e40e-426e-9c35-39c74ff13e3f",
+      "metadata": {
+        "id": "9c0fb6fb-e40e-426e-9c35-39c74ff13e3f"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "7042ac1e-0868-428a-afa5-a6c00f769627",
+      "metadata": {
+        "id": "7042ac1e-0868-428a-afa5-a6c00f769627"
+      },
+      "source": [
+        "##### Navigating through the \"Tree\" of HTML Elements\n",
+        "\n",
+        "Beautiful Soup provides a robust set of tools that allow you to traverse and explore the hierarchical structure of an HTML document, often referred to as the \"tree\".\n",
+        "\n",
+        "To access child elements directly:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "75bd388d-adb9-4a99-908f-0ceb4bca4c72",
+      "metadata": {
+        "id": "75bd388d-adb9-4a99-908f-0ceb4bca4c72"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "5cd9f804-13ea-4331-981f-47bfe62ec7af",
+      "metadata": {
+        "id": "5cd9f804-13ea-4331-981f-47bfe62ec7af"
+      },
+      "source": [
+        "The code above will first locate the initial `div` element present in the Beautiful Soup object. Subsequently, it will fetch all `h4` elements contained within that `div`.\n",
+        "\n",
+        "But what if you need to retrieve a specific child by its position, say the second child?"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "1bc5aba1-c365-4d3b-959d-56f536338436",
+      "metadata": {
+        "id": "1bc5aba1-c365-4d3b-959d-56f536338436"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "9fede2e3-f951-40c6-973a-b2e77ce3f439",
+      "metadata": {
+        "id": "9fede2e3-f951-40c6-973a-b2e77ce3f439"
+      },
+      "source": [
+        "#### Creating a DataFrame with the data"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "4f34f13c-6856-4f82-9719-1e32e2cb1f1b",
+      "metadata": {
+        "id": "4f34f13c-6856-4f82-9719-1e32e2cb1f1b"
+      },
+      "source": [
+        "Instead of getting names and prices separately, we can target the whole component, and extract the name and price from each bicycle component in a more structured manner. By targeting this whole component tag, we can ensure that we are extracting information for the same product (i.e., the name and price correspond to the same bicycle).\n",
+        "\n",
+        "Here's how we can achieve this:\n",
+        "\n",
+        "1. **Targeting the Whole Component**:\n",
+        "   - Instead of targeting individual tags for names and prices, we target the main component that houses both the name and price.\n",
+        "   - By visually inspecting the HTML, we can see that:\n",
+        "       - The information for each bicycle (name, price, etc.) is grouped together under a `<section>` tag.\n",
+        "       - The `class` attribute of this `<section>` tag is `de-ProductTile-info`. This class seemed specific to the product tile and thus, a good candidate to use for extraction.\n",
+        "   \n",
+        "2. **Iterating through Components**:\n",
+        "   - For each such component, extract the name and the price.\n",
+        "   \n",
+        "3. **Storing Data**:\n",
+        "   - Store the extracted data in lists, which can then be used to create a DataFrame."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "4f75eeb5-03bc-466b-9118-91e15237a94c",
+      "metadata": {
+        "id": "4f75eeb5-03bc-466b-9118-91e15237a94c"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "2bb40803-e25b-45b9-98bf-ef293c3143a4",
+      "metadata": {
+        "id": "2bb40803-e25b-45b9-98bf-ef293c3143a4"
+      },
+      "source": [
+        "We could further clean our dataset to ensure the price is a float, facilitating arithmetic operations. This implies prices shouldn't be in a range."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "33c85bd8-6c40-4bad-8fe9-924f4441d47e",
+      "metadata": {
+        "id": "33c85bd8-6c40-4bad-8fe9-924f4441d47e"
+      },
+      "source": [
+        "#### 💡 Check for understanding"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "4c486efc-256b-4e0b-9339-f8eaa5c098ce",
+      "metadata": {
+        "id": "4c486efc-256b-4e0b-9339-f8eaa5c098ce"
+      },
+      "source": [
+        "You are given a raw HTML content of a product list from an online store. Your task is to extract the following details for each product:\n",
+        "\n",
+        "- Bicycle Name\n",
+        "- Bicycle Price\n",
+        "- URL for the product details\n",
+        "- URL for the product image\n",
+        "\n",
+        "Write a function extract_bike_info that takes in the HTML content and returns a pandas DataFrame with the above columns.\n",
+        "\n",
+        "**Hint:**\n",
+        "\n",
+        "In order to get the product image, it might be a good idea to use the `article` tag with the class `de-ProductTile` since based on the HTML structure, this `article` tag encapsulates the entire product, including both the image and the product details. This allows us to more easily access all the relevant details for each product without having to jump around different sections.\n",
+        "\n",
+        "If we were to only use `soup.find_all('section', class_='de-ProductTile-info')`, we'd be focusing solely on the product details section and would then need a separate approach to extract the image URL. By starting with the `article` tag, we're able to extract all the needed data in a more cohesive and streamlined manner.\n",
+        "\n",
+        "**Bonus:** clean the price column so you can make numerical operations."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "108ae791-9fd0-4c32-8a7a-f3b8d79c5de5",
+      "metadata": {
+        "id": "108ae791-9fd0-4c32-8a7a-f3b8d79c5de5"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "15969ba9-1ef7-49f8-96aa-0ccef30a6d84",
+      "metadata": {
+        "id": "15969ba9-1ef7-49f8-96aa-0ccef30a6d84"
+      },
+      "source": [
+        "#### Scraping many pages"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e227dbd2-7cf8-417d-a5f6-2eb792a94f40",
+      "metadata": {
+        "id": "e227dbd2-7cf8-417d-a5f6-2eb792a94f40"
+      },
+      "source": [
+        "When dealing with a limited number of bicycles, all products are conveniently displayed on a single page. But what if there were numerous products necessitating pagination across multiple pages?\n",
+        "\n",
+        "Consider the 'deals' collection. By navigating to the end of its first page on the website, we can observe pagination links. Transitioning to the second page results in a change in the URL:\n",
+        "\n",
+        "From:\n",
+        "\"https://www.decathlon.com/collections/deals\"\n",
+        "To:\n",
+        "\"https://www.decathlon.com/collections/deals?page=2\"\n",
+        "\n",
+        "This pattern in the URL can be leveraged to generate a series of URLs for web scraping.\n",
+        "\n",
+        "Please note: Depending on the current offers available at the time of this lesson, pagination might not be present. If that's the case, explore other product categories that have a substantial number of items, resulting in multiple pages."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "445dae4b-4630-49d7-86de-bf5fefce33ba",
+      "metadata": {
+        "id": "445dae4b-4630-49d7-86de-bf5fefce33ba"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "96a1c69c-3526-49a1-b573-efaa992b3939",
+      "metadata": {
+        "id": "96a1c69c-3526-49a1-b573-efaa992b3939"
+      },
+      "source": [
+        "If you look at our results, and compare it with the website, you'll see that its not returning all the products. Each page has more than 9 products, and its only returning 9 on each page.\n",
+        "\n",
+        "This could be because the content is dynamic.\n",
+        "\n",
+        "**Dynamic Content**: Many modern websites use JavaScript to load content dynamically. When you make a request using libraries like requests, you only retrieve the initial HTML content. Content loaded dynamically via JavaScript after this won't be captured. In such cases, tools like Selenium are used because they can interact with the JavaScript of the page.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "3b3a4997-1f4d-469c-915f-77f367f140e2",
+      "metadata": {
+        "id": "3b3a4997-1f4d-469c-915f-77f367f140e2"
+      },
+      "source": [
+        "#### CSS selectors"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "47d1b2c9-bac8-4ffe-af6b-af74a916e335",
+      "metadata": {
+        "id": "47d1b2c9-bac8-4ffe-af6b-af74a916e335"
+      },
+      "source": [
+        "CSS selectors are patterns used to select and manipulate one or more elements in an HTML or XML document. When web scraping with Python, CSS selectors can be used to target specific elements of interest within the page's content."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "f4ae85ee-a059-4239-abe5-bf31424f6547",
+      "metadata": {
+        "id": "f4ae85ee-a059-4239-abe5-bf31424f6547"
+      },
+      "source": [
+        "The `select` method in BeautifulSoup allows you to pass a CSS selector and returns a list of elements matching that selector."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "5c8b359f-3887-45b7-81c7-cbe27b334f56",
+      "metadata": {
+        "id": "5c8b359f-3887-45b7-81c7-cbe27b334f56"
+      },
+      "source": [
+        "1. **Tag Selector**: Targets elements by their tag name.\n",
+        "   - `p`: selects all `<p>` elements.\n",
+        "   - `soup.select(\"p\")` will retrieve all `<p>` elements\n",
+        "\n",
+        "2. **Class Selector**: Targets elements by their class attribute.\n",
+        "   - `.classname`: selects all elements with `class=\"classname\"`.\n",
+        "   - If class name has spaces, they must be changed by `.`\n",
+        "   - `soup.select(\".classname\")`\n",
+        "   - To combine both, we can have `soup.select(\"tagname.classname\")`\n",
+        "\n",
+        "3. **Descendant Selector**: Targets an element that is a descendant of another element.\n",
+        "   - `div p`: selects all `<p>` elements inside a `<div>` element.\n",
+        "   - `.class1 .class2`: selects all elements with class2 that is a descendant of an element with class1.\n",
+        "   \n",
+        "4. **Attribute Selector**: Targets elements based on their attributes and values.\n",
+        "   - `a[href]`: selects all `<a>` elements with an `href` attribute.\n",
+        "   - `a[href=\"https://www.example.com\"]`: selects all `<a>` elements with an `href` value of \"https://www.example.com\".\n",
+        "\n",
+        "And more...\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "b34db39d-7a96-4ef9-b311-8f174c12a5e6",
+      "metadata": {
+        "id": "b34db39d-7a96-4ef9-b311-8f174c12a5e6"
+      },
+      "source": [
+        "1. **Tag Selector**:\n",
+        "   - **`article`**: This would select all `<article>` elements on the page.\n",
+        "  "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "4cd3fcdd-6482-4b4c-8a96-fa6018377f48",
+      "metadata": {
+        "id": "4cd3fcdd-6482-4b4c-8a96-fa6018377f48"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "1a3993ae-8132-40cc-b775-562de4c2467c",
+      "metadata": {
+        "id": "1a3993ae-8132-40cc-b775-562de4c2467c"
+      },
+      "source": [
+        "2. **Class Selector**:\n",
+        "   - **`.de-ProductTile`**: This would select all elements with the class `de-ProductTile`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "080be45a-f5c5-4546-bf23-49a50bff4891",
+      "metadata": {
+        "id": "080be45a-f5c5-4546-bf23-49a50bff4891"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "d35bb4ab-f0ec-425d-b1a6-70f4c93a9ca2",
+      "metadata": {
+        "id": "d35bb4ab-f0ec-425d-b1a6-70f4c93a9ca2"
+      },
+      "source": [
+        "To combine both, we can have `soup.select(\"tagname.classname\")`"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "fb220a7f-554f-4436-8c7e-ab130f176f86",
+      "metadata": {
+        "id": "fb220a7f-554f-4436-8c7e-ab130f176f86"
+      },
+      "source": [
+        "Without CSS selectors we did:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "e807f7d5-fc44-486c-8e8c-cdbe4d7142b0",
+      "metadata": {
+        "id": "e807f7d5-fc44-486c-8e8c-cdbe4d7142b0"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "a3f99e3c-def6-418b-aeec-d3b163ba52e8",
+      "metadata": {
+        "id": "a3f99e3c-def6-418b-aeec-d3b163ba52e8"
+      },
+      "source": [
+        "Equivalently, using CSS selectors, which is a universal syntax, you can try and find `tag_name.class_name`. If class name has spaces, they must be changed by `.`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "023dc830-9da6-43c9-8517-15b9d41d35a6",
+      "metadata": {
+        "id": "023dc830-9da6-43c9-8517-15b9d41d35a6"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e2288120-20e2-4bb6-8376-7b606f3c16f5",
+      "metadata": {
+        "id": "e2288120-20e2-4bb6-8376-7b606f3c16f5"
+      },
+      "source": [
+        "3. **Descendant Selector**:\n",
+        "   - **`.de-ProductTile .de-ProductTile-title`**: This would select all elements with the class `de-ProductTile-title` that are descendants of elements with the class `de-ProductTile`.\n",
+        "   - **`article h4`**: This would select all `<h4>` elements that are descendants of `<article>` elements."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "1b4d2907-4916-4f42-811a-8aad177d1269",
+      "metadata": {
+        "id": "1b4d2907-4916-4f42-811a-8aad177d1269"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "db1530f5-6f9b-44b8-adfc-24455b2a003d",
+      "metadata": {
+        "id": "db1530f5-6f9b-44b8-adfc-24455b2a003d"
+      },
+      "source": [
+        "[Beautiful Soup selectors](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "5c61b19b-123c-4d50-a2aa-aaa498dd1815",
+      "metadata": {
+        "id": "5c61b19b-123c-4d50-a2aa-aaa498dd1815"
+      },
+      "source": [
+        "You can also use a combination of `find`, `find_all`, and `select` methods to navigate and locate the elements you're interested in more efficiently. Here's how you can use them together:\n",
+        "\n",
+        "1. **Using `find` or `find_all` to Narrow Down the Search Scope**:\n",
+        "   \n",
+        "   Initially, you can use `find` or `find_all` to narrow down your search to a specific section of the HTML document.\n",
+        "\n",
+        "   ```python\n",
+        "   section = soup.find('div', class_='product-section')\n",
+        "   ```\n",
+        "\n",
+        "2. **Using `select` to Further Locate Elements**:\n",
+        "\n",
+        "   After narrowing down the section, you can use the `select` method to locate elements using CSS selectors, which allow for more complex queries. The `select` method can be used on a BeautifulSoup object or on a Tag object (like the one retrieved in step 1).\n",
+        "\n",
+        "   ```python\n",
+        "   product_links = section.select('a.product-link')\n",
+        "   ```\n",
+        "\n",
+        "In this snippet:\n",
+        "- First, we locate a section of the webpage using `find`.\n",
+        "- Then, within that section, we locate all `a` elements with the class `'product-link'` using `select`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "85d1e946-b005-4c01-80de-3cd6321119ba",
+      "metadata": {
+        "id": "85d1e946-b005-4c01-80de-3cd6321119ba"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "8386cdad-c04b-46b0-b4a5-56e21a9fc1b3",
+      "metadata": {
+        "id": "8386cdad-c04b-46b0-b4a5-56e21a9fc1b3"
+      },
+      "source": [
+        "### More examples"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "9d861e35-790b-46a3-a8a0-14af3e18f734",
+      "metadata": {
+        "id": "9d861e35-790b-46a3-a8a0-14af3e18f734"
+      },
+      "source": [
+        "#### BBC"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "9e0281a7-4136-4f4a-af80-6258237fe252",
+      "metadata": {
+        "id": "9e0281a7-4136-4f4a-af80-6258237fe252"
+      },
+      "source": [
+        "Lets scrape the BBC to gather some information."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "1c253371-6984-4430-96aa-170b96807d8c",
+      "metadata": {
+        "id": "1c253371-6984-4430-96aa-170b96807d8c"
+      },
+      "source": [
+        "We'll get the hyperlinks to images from the BBC website."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "f4df486c-93f7-4eaf-a4d2-fa410d995a46",
+      "metadata": {
+        "id": "f4df486c-93f7-4eaf-a4d2-fa410d995a46"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "markdown",
+      "id": "874ee933-cd27-4eb1-b97d-75cc822c3a95",
+      "metadata": {
+        "id": "874ee933-cd27-4eb1-b97d-75cc822c3a95"
+      },
+      "source": [
+        "## Comments"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "000a20c0-ea4e-4065-9958-426c9355de3a",
+      "metadata": {
+        "id": "000a20c0-ea4e-4065-9958-426c9355de3a"
+      },
+      "source": [
+        "It's always recommended to check for the availability of an **API** before resorting to web scraping for the following reasons:\n",
+        " * It is generally much easier to use\n",
+        " * APIs are usually well-documented\n",
+        " * Utilizing APIs is often preferred by server administrators\n",
+        "\n",
+        "Refer to the `robots.txt` file on a website (by doing `www.example.com/robots.txt`) to understand the server's guidelines and limitations regarding web scraping."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "ce9736c1-bce6-4da5-be81-b7700bcc48de",
+      "metadata": {
+        "id": "ce9736c1-bce6-4da5-be81-b7700bcc48de"
+      },
+      "source": [
+        "## Summary"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "d9dcaa18-38d3-47fa-a890-15320b61237c",
+      "metadata": {
+        "id": "d9dcaa18-38d3-47fa-a890-15320b61237c"
+      },
+      "source": [
+        "1. **Web Technologies**:\n",
+        "   - **HTML**: This is the standard markup language that holds the content of the webpage. It is the primary target when we engage in web scraping.\n",
+        "   - **CSS**: Cascading Style Sheets are used to describe the look and formatting of a document written in HTML.\n",
+        "   - **JavaScript**: This is a scripting language used to create and interactive and dynamic website content.\n",
+        "\n",
+        "2. **HTML Structure**:\n",
+        "   - **Hierarchical**: HTML documents are structured hierarchically, meaning elements are nested within other elements, forming a tree-like structure.\n",
+        "   - **Tags**: These are the building blocks of HTML, defining elements that hold different types of content.\n",
+        "   - **Attributes**: HTML tags can have attributes, which define properties of an element and are used to set various characteristics such as class, ID, and style.\n",
+        "\n",
+        "3. **Web Scraping Tools**:\n",
+        "   - **Requests**: A Python library that allows you to send HTTP requests to get the HTML content of a webpage.\n",
+        "   - **Beautiful Soup**: A Python library that facilitates the programmatic analysis of HTML, helping in parsing the HTML and navigating the parse tree.\n",
+        "   - **Selenium**: In cases where the webpage content is dynamic and generated using JavaScript, tools like Selenium are often used. Selenium can interact with JavaScript to load dynamic content, making it accessible for scraping.\n",
+        "   \n",
+        "4. **Finding and Selecting Elements**:\n",
+        "   - **Selection by Tag, Class, and ID**: We can find elements using various attributes such as their tag name, class name, or ID.\n",
+        "   - **CSS Selectors**: These are patterns used to select elements more complexly, leveraging the relationships between different elements to find them in numerous ways.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "9535b92e-c209-446c-8e3f-b359332acdb1",
+      "metadata": {
+        "id": "9535b92e-c209-446c-8e3f-b359332acdb1"
+      },
+      "source": [
+        "## Further materials"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e582b62b-77c5-4ee9-8c32-d005786bf408",
+      "metadata": {
+        "id": "e582b62b-77c5-4ee9-8c32-d005786bf408"
+      },
+      "source": [
+        "[Web archive](http://web.archive.org/): Find the historical state of webpages in the past!"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "fea668ac-cc99-40dc-976b-354390ef99c7",
+      "metadata": {
+        "id": "fea668ac-cc99-40dc-976b-354390ef99c7"
+      },
+      "source": [
+        "### How to Solve a 403 Error\n",
+        "\n",
+        "When you get a `403` status code in response to a web request, it means \"Forbidden.\" The server understands your request, but it refuses to fulfill it. This is often a measure by websites to prevent web scraping or automated access.\n",
+        "\n",
+        "Here's why you might get a `403 Forbidden` error:\n",
+        "\n",
+        "1. **User-Agent**: Many websites block requests that don't have a standard web browser User-Agent. The default User-Agent of the `requests` library often gets blocked.\n",
+        "2. **Robots.txt**: This is a file websites use to guide web crawlers about which pages or sections of the site shouldn't be processed or scanned. Respect it.\n",
+        "3. **Rate Limiting**: Websites might block you if you make too many requests in a short period.\n",
+        "And more...\n",
+        "\n",
+        "To solve it, try the following, starting from the user-agent:\n",
+        "\n",
+        "1. **Change the User-Agent**:\n",
+        "   You can mimic a request from a web browser by setting a User-Agent header.\n",
+        "   ```python\n",
+        "   headers = {\n",
+        "       \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36\"\n",
+        "   }\n",
+        "   response = requests.get(url, headers=headers)\n",
+        "   ```\n",
+        "\n",
+        "2. **Use a Web Scraper Library**:\n",
+        "   Libraries like Scrapy or Selenium can help bypass restrictions, especially when JavaScript rendering is involved.\n",
+        "\n",
+        "3. **Respect `robots.txt`**:\n",
+        "   Always check `https://www.example.com/robots.txt` (replace `example.com` with the website's domain) to see which URLs you're allowed to access.\n",
+        "\n",
+        "4. **Rate Limiting**:\n",
+        "   Implement delays in your requests using `time.sleep(seconds)` to avoid hitting rate limits.\n",
+        "\n",
+        "5. **Use Proxies or VPN**:\n",
+        "   Rotate IP addresses or use a VPN service if the server has blocked your IP.\n",
+        "\n",
+        "6. **Sessions & Cookies**:\n",
+        "   Some websites might require maintaining sessions or handling cookies.\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.9.13"
+    },
+    "toc": {
+      "base_numbering": 1,
+      "nav_menu": {},
+      "number_sections": true,
+      "sideBar": true,
+      "skip_h1_title": false,
+      "title_cell": "Table of Contents",
+      "title_sidebar": "Contents",
+      "toc_cell": true,
+      "toc_position": {},
+      "toc_section_display": true,
+      "toc_window_display": false
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/lab-web-scraping.ipynb b/lab-web-scraping.ipynb
index e552783..e1a8065 100644
--- a/lab-web-scraping.ipynb
+++ b/lab-web-scraping.ipynb
@@ -110,14 +110,108 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 5,
       "id": "40359eee-9cd7-4884-bfa4-83344c222305",
       "metadata": {
         "id": "40359eee-9cd7-4884-bfa4-83344c222305"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "ename": "ValueError",
+          "evalue": "could not convert string to float: 'Â£51.77'",
+          "output_type": "error",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+            "Cell \u001b[0;32mIn[5], line 79\u001b[0m\n\u001b[1;32m     75\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mDataFrame(books_data)\n\u001b[1;32m     78\u001b[0m \u001b[38;5;66;03m# Example usage with the TASK requirements\u001b[39;00m\n\u001b[0;32m---> 79\u001b[0m df \u001b[38;5;241m=\u001b[39m scrape_books(min_rating\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4\u001b[39m, max_price\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m)\n\u001b[1;32m     80\u001b[0m \u001b[38;5;28mprint\u001b[39m(df\u001b[38;5;241m.\u001b[39mhead())\n\u001b[1;32m     81\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mTotal books scraped: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(df)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
+            "Cell \u001b[0;32mIn[5], line 42\u001b[0m, in \u001b[0;36mscrape_books\u001b[0;34m(min_rating, max_price)\u001b[0m\n\u001b[1;32m     40\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m book \u001b[38;5;129;01min\u001b[39;00m soup\u001b[38;5;241m.\u001b[39mselect(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marticle.product_pod\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m     41\u001b[0m     title \u001b[38;5;241m=\u001b[39m book\u001b[38;5;241m.\u001b[39mh3\u001b[38;5;241m.\u001b[39ma[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtitle\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m---> 42\u001b[0m     price \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mfloat\u001b[39m(book\u001b[38;5;241m.\u001b[39mselect_one(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.price_color\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mtext\u001b[38;5;241m.\u001b[39mstrip(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m£\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m     43\u001b[0m     rating \u001b[38;5;241m=\u001b[39m rating_map[book\u001b[38;5;241m.\u001b[39mp[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclass\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;241m1\u001b[39m]]\n\u001b[1;32m     45\u001b[0m     \u001b[38;5;66;03m# Skip if rating or price doesn’t meet filter\u001b[39;00m\n",
+            "\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'Â£51.77'"
+          ]
+        }
+      ],
       "source": [
-        "# Your solution goes here"
+        "# **Task**\n",
+        "\n",
+        "# Your task is to create a Python script using BeautifulSoup and pandas to scrape Books to Scrape book data, focusing on book ratings and genres. The script should be able to filter books with ratings above a certain threshold and in specific genres. Additionally, the script should structure the scraped data in a tabular format using pandas for further analysis.\n",
+        "\n",
+        "# **Expected Outcome**\n",
+        "\n",
+        "#### A function named `scrape_books` that takes two parameters: `min_rating` and `max_price`. The function should scrape book data from the \"Books to Scrape\" website and return a `pandas` DataFrame with the following columns:\n",
+        "\n",
+        "# **Expected Outcome**\n",
+        "\n",
+        "# - A function named `scrape_books` that takes two parameters: `min_rating` and `max_price`.\n",
+        "# - The function should return a DataFrame with the following columns:\n",
+        "#   - **UPC**: The Universal Product Code (UPC) of the book.\n",
+        "#   - **Title**: The title of the book.\n",
+        "#   - **Price (£)**: The price of the book in pounds.\n",
+        "#   - **Rating**: The rating of the book (1-5 stars).\n",
+        "#   - **Genre**: The genre of the book.\n",
+        "#   - **Availability**: Whether the book is in stock or not.\n",
+        "#   - **Description**: A brief description or product description of the book (if available).\n",
+        "  \n",
+        "##### You will execute this script to scrape data for books with a minimum rating of `4.0 and above` and a maximum price of `£20`. \n",
+        "import requests\n",
+        "from bs4 import BeautifulSoup\n",
+        "import pandas as pd\n",
+        "\n",
+        "def scrape_books(min_rating=3, max_price=50.0):\n",
+        "    base_url = \"http://books.toscrape.com/catalogue/page-{}.html\"\n",
+        "    books_data = []\n",
+        "\n",
+        "    # Convert rating words into numbers\n",
+        "    rating_map = {\"One\": 1, \"Two\": 2, \"Three\": 3, \"Four\": 4, \"Five\": 5}\n",
+        "\n",
+        "    # Loop through ALL 50 pages\n",
+        "    for page in range(1, 51):\n",
+        "        url = base_url.format(page)\n",
+        "        response = requests.get(url)\n",
+        "        soup = BeautifulSoup(response.text, \"html.parser\")\n",
+        "\n",
+        "        # Loop through all books on the page\n",
+        "        for book in soup.select(\"article.product_pod\"):\n",
+        "            title = book.h3.a[\"title\"]\n",
+        "            price = float(book.select_one(\".price_color\").text.strip(\"£\"))\n",
+        "            rating = rating_map[book.p[\"class\"][1]]\n",
+        "\n",
+        "            # Skip if rating or price doesn’t meet filter\n",
+        "            if rating < min_rating or price > max_price:\n",
+        "                continue\n",
+        "\n",
+        "            # Follow link to book detail page\n",
+        "            detail_url = \"http://books.toscrape.com/catalogue/\" + book.h3.a[\"href\"]\n",
+        "            detail_resp = requests.get(detail_url)\n",
+        "            detail_soup = BeautifulSoup(detail_resp.text, \"html.parser\")\n",
+        "\n",
+        "            # Extract details\n",
+        "            upc = detail_soup.select_one(\"th:contains('UPC') + td\").text\n",
+        "            availability = detail_soup.select_one(\"th:contains('Availability') + td\").text\n",
+        "            description_tag = detail_soup.select_one(\"#product_description + p\")\n",
+        "            description = description_tag.text if description_tag else \"No description\"\n",
+        "\n",
+        "            # FIX: Genre is second-to-last breadcrumb\n",
+        "            genre = detail_soup.select(\"ul.breadcrumb li a\")[-2].text\n",
+        "\n",
+        "            # Save book data\n",
+        "            books_data.append({\n",
+        "                \"UPC\": upc,\n",
+        "                \"Title\": title,\n",
+        "                \"Price (£)\": price,\n",
+        "                \"Rating\": rating,\n",
+        "                \"Genre\": genre,\n",
+        "                \"Availability\": availability,\n",
+        "                \"Description\": description\n",
+        "            })\n",
+        "\n",
+        "    # Create DataFrame\n",
+        "    return pd.DataFrame(books_data)\n",
+        "\n",
+        "\n",
+        "# Example usage with the TASK requirements\n",
+        "df = scrape_books(min_rating=4, max_price=20)\n",
+        "print(df.head())\n",
+        "print(f\"\\nTotal books scraped: {len(df)}\")\n",
+        "\n"
       ]
     }
   ],
@@ -126,7 +220,7 @@
       "provenance": []
     },
     "kernelspec": {
-      "display_name": "Python 3 (ipykernel)",
+      "display_name": "base",
       "language": "python",
       "name": "python3"
     },
@@ -140,7 +234,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.13"
+      "version": "3.12.2"
     }
   },
   "nbformat": 4,