From 622785cea6c32844c8a628cf263545102cfb5512 Mon Sep 17 00:00:00 2001
From: martin-paz-y <martinsdelolu@gmail.com>
Date: Sun, 14 Sep 2025 12:20:32 +0200
Subject: [PATCH] Solved Lab

---
 lab-dw-pandas.ipynb | 619 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 607 insertions(+), 12 deletions(-)
diff --git a/lab-dw-pandas.ipynb b/lab-dw-pandas.ipynb
index fbd468314..3c8cb1b05 100644
--- a/lab-dw-pandas.ipynb
+++ b/lab-dw-pandas.ipynb
@@ -82,12 +82,483 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
+   "id": "3a795497-28e5-41d5-a6c6-e3c51324956a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
    "id": "dd4e8cd8-a6f6-486c-a5c4-1745b0c035f4",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here"
+    "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\"\n",
+    "labdata_df = pd.read_csv(url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "e49190cf-affb-4561-9a70-207823788fa9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Customer</th>\n",
+       "      <th>ST</th>\n",
+       "      <th>GENDER</th>\n",
+       "      <th>Education</th>\n",
+       "      <th>Customer Lifetime Value</th>\n",
+       "      <th>Income</th>\n",
+       "      <th>Monthly Premium Auto</th>\n",
+       "      <th>Number of Open Complaints</th>\n",
+       "      <th>Policy Type</th>\n",
+       "      <th>Vehicle Class</th>\n",
+       "      <th>Total Claim Amount</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>RB50392</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Master</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1000.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>2.704934</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>QZ44356</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>697953.59%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>94.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>1131.464935</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AI49188</td>\n",
+       "      <td>Nevada</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>1288743.17%</td>\n",
+       "      <td>48767.0</td>\n",
+       "      <td>108.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Two-Door Car</td>\n",
+       "      <td>566.472247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>WW63253</td>\n",
+       "      <td>California</td>\n",
+       "      <td>M</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>764586.18%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>529.881344</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>GA49547</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>M</td>\n",
+       "      <td>High School or Below</td>\n",
+       "      <td>536307.65%</td>\n",
+       "      <td>36357.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>17.269323</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4003</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4004</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4005</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4006</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4007</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>4008 rows × 11 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     Customer          ST GENDER             Education  \\\n",
+       "0     RB50392  Washington    NaN                Master   \n",
+       "1     QZ44356     Arizona      F              Bachelor   \n",
+       "2     AI49188      Nevada      F              Bachelor   \n",
+       "3     WW63253  California      M              Bachelor   \n",
+       "4     GA49547  Washington      M  High School or Below   \n",
+       "...       ...         ...    ...                   ...   \n",
+       "4003      NaN         NaN    NaN                   NaN   \n",
+       "4004      NaN         NaN    NaN                   NaN   \n",
+       "4005      NaN         NaN    NaN                   NaN   \n",
+       "4006      NaN         NaN    NaN                   NaN   \n",
+       "4007      NaN         NaN    NaN                   NaN   \n",
+       "\n",
+       "     Customer Lifetime Value   Income  Monthly Premium Auto  \\\n",
+       "0                        NaN      0.0                1000.0   \n",
+       "1                 697953.59%      0.0                  94.0   \n",
+       "2                1288743.17%  48767.0                 108.0   \n",
+       "3                 764586.18%      0.0                 106.0   \n",
+       "4                 536307.65%  36357.0                  68.0   \n",
+       "...                      ...      ...                   ...   \n",
+       "4003                     NaN      NaN                   NaN   \n",
+       "4004                     NaN      NaN                   NaN   \n",
+       "4005                     NaN      NaN                   NaN   \n",
+       "4006                     NaN      NaN                   NaN   \n",
+       "4007                     NaN      NaN                   NaN   \n",
+       "\n",
+       "     Number of Open Complaints     Policy Type  Vehicle Class  \\\n",
+       "0                       1/0/00   Personal Auto  Four-Door Car   \n",
+       "1                       1/0/00   Personal Auto  Four-Door Car   \n",
+       "2                       1/0/00   Personal Auto   Two-Door Car   \n",
+       "3                       1/0/00  Corporate Auto            SUV   \n",
+       "4                       1/0/00   Personal Auto  Four-Door Car   \n",
+       "...                        ...             ...            ...   \n",
+       "4003                       NaN             NaN            NaN   \n",
+       "4004                       NaN             NaN            NaN   \n",
+       "4005                       NaN             NaN            NaN   \n",
+       "4006                       NaN             NaN            NaN   \n",
+       "4007                       NaN             NaN            NaN   \n",
+       "\n",
+       "      Total Claim Amount  \n",
+       "0               2.704934  \n",
+       "1            1131.464935  \n",
+       "2             566.472247  \n",
+       "3             529.881344  \n",
+       "4              17.269323  \n",
+       "...                  ...  \n",
+       "4003                 NaN  \n",
+       "4004                 NaN  \n",
+       "4005                 NaN  \n",
+       "4006                 NaN  \n",
+       "4007                 NaN  \n",
+       "\n",
+       "[4008 rows x 11 columns]"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "labdata_df # Identify the dimensions of the dataset by determining the number of rows and columns it contains."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "id": "708298fc-c738-438c-a38f-b445dc2dc35e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 4008 entries, 0 to 4007\n",
+      "Data columns (total 11 columns):\n",
+      " #   Column                     Non-Null Count  Dtype  \n",
+      "---  ------                     --------------  -----  \n",
+      " 0   Customer                   1071 non-null   object \n",
+      " 1   ST                         1071 non-null   object \n",
+      " 2   GENDER                     954 non-null    object \n",
+      " 3   Education                  1071 non-null   object \n",
+      " 4   Customer Lifetime Value    1068 non-null   object \n",
+      " 5   Income                     1071 non-null   float64\n",
+      " 6   Monthly Premium Auto       1071 non-null   float64\n",
+      " 7   Number of Open Complaints  1071 non-null   object \n",
+      " 8   Policy Type                1071 non-null   object \n",
+      " 9   Vehicle Class              1071 non-null   object \n",
+      " 10  Total Claim Amount         1071 non-null   float64\n",
+      "dtypes: float64(3), object(8)\n",
+      "memory usage: 344.6+ KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Determine the data types of each column and evaluate whether they are appropriate for the nature of the variable. You should also provide suggestions for fixing any incorrect data types.\n",
+    "\n",
+    "labdata_df .info()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "4d039387-1214-497f-8b90-06d0869b324d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Customer                     1071\n",
+       "ST                              8\n",
+       "GENDER                          5\n",
+       "Education                       6\n",
+       "Customer Lifetime Value      1027\n",
+       "Income                        774\n",
+       "Monthly Premium Auto          132\n",
+       "Number of Open Complaints       6\n",
+       "Policy Type                     3\n",
+       "Vehicle Class                   6\n",
+       "Total Claim Amount            761\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#- Identify the number of unique values for each column and determine which columns appear to be categorical. You should also describe the unique values of each categorical column and the range of values for numerical columns, and give your insights.\n",
+    "\n",
+    "labdata_df.nunique()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "fed7d569-5971-4e17-9b0c-bd41d8cc63ab",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "This is Median\n",
+      " Income                  36234.000000\n",
+      "Monthly Premium Auto       83.000000\n",
+      "Total Claim Amount        354.729129\n",
+      "dtype: float64\n",
+      "This is Mean\n",
+      " Income                  39295.701214\n",
+      "Monthly Premium Auto      193.234360\n",
+      "Total Claim Amount        404.986909\n",
+      "dtype: float64\n",
+      "This is Mode\n",
+      "    Income  Monthly Premium Auto  Total Claim Amount\n",
+      "0     0.0                  65.0               321.6\n",
+      "This is Std\n",
+      " Income                  30469.427060\n",
+      "Monthly Premium Auto     1601.190369\n",
+      "Total Claim Amount        293.027260\n",
+      "dtype: float64\n",
+      "This is Q1 (25%)\n",
+      " Income                  14072.000000\n",
+      "Monthly Premium Auto       68.000000\n",
+      "Total Claim Amount        202.157702\n",
+      "Name: 0.25, dtype: float64\n",
+      "This is Q2 (50% / Median)\n",
+      " Income                  36234.000000\n",
+      "Monthly Premium Auto       83.000000\n",
+      "Total Claim Amount        354.729129\n",
+      "Name: 0.5, dtype: float64\n",
+      "This is Q3 (75%)\n",
+      " Income                  64631.0\n",
+      "Monthly Premium Auto      109.5\n",
+      "Total Claim Amount        532.8\n",
+      "Name: 0.75, dtype: float64\n"
+     ]
+    }
+   ],
+   "source": [
+    "#- Compute summary statistics such as mean, median, mode, standard deviation, and quartiles to understand the central tendency and distribution of the data for numerical columns. You should also provide your conclusions based on these summary statistics.\n",
+    "\n",
+    "# Selecciona solo las columnas numéricas\n",
+    "numeric_columns = labdata_df.select_dtypes(include='number')\n",
+    "\n",
+    "# Calcula las estadísticas\n",
+    "median = numeric_columns.median()\n",
+    "mean = numeric_columns.mean()\n",
+    "mode = numeric_columns.mode()\n",
+    "std = numeric_columns.std()\n",
+    "q1 = numeric_columns.quantile(0.25)  # 25%\n",
+    "q2 = numeric_columns.quantile(0.50)  # 50% = median\n",
+    "q3 = numeric_columns.quantile(0.75)  # 75%\n",
+    "\n",
+    "# Muestra resultados\n",
+    "print(\"This is Median\\n\", median)\n",
+    "print(\"This is Mean\\n\", mean)\n",
+    "print(\"This is Mode\\n\", mode)   # <-- aquí estaba mal en tu print\n",
+    "print(\"This is Std\\n\", std)     # <-- aquí también estaba mal el texto\n",
+    "print(\"This is Q1 (25%)\\n\", q1)\n",
+    "print(\"This is Q2 (50% / Median)\\n\", q2)\n",
+    "print(\"This is Q3 (75%)\\n\", q3)\n",
+    "\n",
+    "# Insights: Income big variailty STD / Monthly premiun auto Difference mean and median (meaning bit outliners)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "1c7ce63f-4aa3-4d2c-b0a7-718f384318b7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "This is Count\n",
+      " Customer                     1071\n",
+      "ST                           1071\n",
+      "GENDER                        954\n",
+      "Education                    1071\n",
+      "Customer Lifetime Value      1068\n",
+      "Number of Open Complaints    1071\n",
+      "Policy Type                  1071\n",
+      "Vehicle Class                1071\n",
+      "dtype: int64\n",
+      "This is Unique\n",
+      " Customer                     1071\n",
+      "ST                              8\n",
+      "GENDER                          5\n",
+      "Education                       6\n",
+      "Customer Lifetime Value      1027\n",
+      "Number of Open Complaints       6\n",
+      "Policy Type                     3\n",
+      "Vehicle Class                   6\n",
+      "dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "#- Compute summary statistics for categorical columns and providing your conclusions based on these statistics.\n",
+    "\n",
+    "# Selecciona solo las columnas numéricas\n",
+    "object_int_columns = labdata_df.select_dtypes(include=[\"object\", \"int64\"])\n",
+    "\n",
+    "# Calcula las estadísticas\n",
+    "count = object_int_columns.count()\n",
+    "nunique = object_int_columns.nunique()\n",
+    "\n",
+    "# Muestra resultados\n",
+    "print(\"This is Count\\n\", count)\n",
+    "print(\"This is Unique\\n\", nunique)\n",
+    "\n",
+    "#Insighs: Gender has missing values (~11%), Customer Lifetime Value is conseidered and obkjetc cos of %, and Policy Type, State, and Education have few categories useful for segmentation.\n",
+    "\n"
    ]
   },
   {
@@ -116,12 +587,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 65,
    "id": "2dca5073-4520-4f42-9390-4b92733284ed",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "ST\n",
+       "AZ             25\n",
+       "WA             30\n",
+       "Washington     81\n",
+       "Nevada         98\n",
+       "Cali          120\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here"
+    "\n",
+    "url = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv'\n",
+    "labdata_df_state = pd.read_csv(url, usecols=[\"ST\"])\n",
+    "\n",
+    "labdata_df_state[\"ST\"].value_counts(ascending=True)[0:5] \n"
    ]
   },
   {
@@ -146,12 +638,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 77,
    "id": "bcfad6c1-9af2-4b0b-9aa9-0dc5c17473c0",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RangeIndex(start=0, stop=4008, step=1)\n",
+      "Policy Type\n",
+      "Personal Auto     780\n",
+      "Corporate Auto    234\n",
+      "Special Auto       57\n",
+      "Name: count, dtype: int64\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "\n",
+    "url = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv'\n",
+    "labdata_df_policy = pd.read_csv(url, usecols=[\"Policy Type\"])\n",
+    "\n",
+    "print(labdata_df_policy.index)\n",
+    "\n",
+    "\n",
+    "print(labdata_df_policy[\"Policy Type\"].value_counts()) #ojo noolvidarse de los ()\n",
+    "\n",
+    "\n"
    ]
   },
   {
@@ -176,12 +690,93 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 87,
    "id": "0c0563cf-6f8b-463d-a321-651a972f82e5",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Policy Type\n",
+       "Corporate Auto    41390.311966\n",
+       "Personal Auto     38180.698718\n",
+       "Special Auto      45954.701754\n",
+       "Name: Income, dtype: float64"
+      ]
+     },
+     "execution_count": 87,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here"
+    "\n",
+    "url = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv'\n",
+    "labdata_df_policy_income = pd.read_csv(url, usecols=[\"Policy Type\",\"Income\"])\n",
+    "\n",
+    "#Use loc to create two dataframes: one containing only Personal Auto policies and one containing only Corporate Auto policies.\n",
+    "\n",
+    "\n",
+    "\n",
+    "avg_income_per_policy = labdata_df_policy_income.groupby(\"Policy Type\")[\"Income\"].mean()\n",
+    "\n",
+    "avg_income_per_policy\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "id": "2b6d7061-b47c-431b-a040-05407ec2ddd6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Personal Auto policies:\n",
+      "     Income    Policy Type\n",
+      "0      0.0  Personal Auto\n",
+      "1      0.0  Personal Auto\n",
+      "2  48767.0  Personal Auto\n",
+      "4  36357.0  Personal Auto\n",
+      "5  62902.0  Personal Auto \n",
+      "\n",
+      "Corporate Auto policies:\n",
+      "      Income     Policy Type\n",
+      "3       0.0  Corporate Auto\n",
+      "6   55350.0  Corporate Auto\n",
+      "7       0.0  Corporate Auto\n",
+      "8   14072.0  Corporate Auto\n",
+      "12  77026.0  Corporate Auto \n",
+      "\n",
+      "Average income (Personal Auto): 38180.69871794872\n",
+      "Average income (Corporate Auto): 41390.31196581197\n"
+     ]
+    }
+   ],
+   "source": [
+    "url = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv'\n",
+    "labdata_df_policy_income = pd.read_csv(url, usecols=[\"Policy Type\",\"Income\"])\n",
+    "\n",
+    "# Usa el nombre correcto del DataFrame\n",
+    "personal_auto_df = labdata_df_policy_income.loc[\n",
+    "    labdata_df_policy_income[\"Policy Type\"] == \"Personal Auto\"\n",
+    "]\n",
+    "\n",
+    "corporate_auto_df = labdata_df_policy_income.loc[\n",
+    "    labdata_df_policy_income[\"Policy Type\"] == \"Corporate Auto\"\n",
+    "]\n",
+    "\n",
+    "print(\"Personal Auto policies:\\n\", personal_auto_df.head(), \"\\n\")\n",
+    "print(\"Corporate Auto policies:\\n\", corporate_auto_df.head(), \"\\n\")\n",
+    "\n",
+    "# 👉 Cálculo de los promedios\n",
+    "avg_income_personal = personal_auto_df[\"Income\"].mean()\n",
+    "avg_income_corporate = corporate_auto_df[\"Income\"].mean()\n",
+    "\n",
+    "print(\"Average income (Personal Auto):\", avg_income_personal)\n",
+    "print(\"Average income (Corporate Auto):\", avg_income_corporate)\n",
+    "\n"
    ]
   },
   {
@@ -251,7 +846,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.13.7"
   }
  },
  "nbformat": 4,

	Customer	ST	GENDER	Education	Customer Lifetime Value	Income	Monthly Premium Auto	Number of Open Complaints	Policy Type	Vehicle Class	Total Claim Amount
0	RB50392	Washington	NaN	Master	NaN	0.0	1000.0	1/0/00	Personal Auto	Four-Door Car	2.704934
1	QZ44356	Arizona	F	Bachelor	697953.59%	0.0	94.0	1/0/00	Personal Auto	Four-Door Car	1131.464935
2	AI49188	Nevada	F	Bachelor	1288743.17%	48767.0	108.0	1/0/00	Personal Auto	Two-Door Car	566.472247
3	WW63253	California	M	Bachelor	764586.18%	0.0	106.0	1/0/00	Corporate Auto	SUV	529.881344
4	GA49547	Washington	M	High School or Below	536307.65%	36357.0	68.0	1/0/00	Personal Auto	Four-Door Car	17.269323
...	...	...	...	...	...	...	...	...	...	...	...
4003	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4004	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4005	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4006	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4007	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN