From d65953a582ded8458e731e29c6a14304bcb97b36 Mon Sep 17 00:00:00 2001
From: davherdel <davherdel@gmail.com>
Date: Sat, 16 Aug 2025 15:37:19 +0100
Subject: [PATCH] Uploaded finished notebook

---
 lab-dw-pandas.ipynb | 696 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 681 insertions(+), 15 deletions(-)
diff --git a/lab-dw-pandas.ipynb b/lab-dw-pandas.ipynb
index fbd468314..5ec019ad3 100644
--- a/lab-dw-pandas.ipynb
+++ b/lab-dw-pandas.ipynb
@@ -80,14 +80,533 @@
     "- Compute summary statistics for categorical columns and providing your conclusions based on these statistics."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "1222f185",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "68a61b80",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Customer</th>\n",
+       "      <th>ST</th>\n",
+       "      <th>GENDER</th>\n",
+       "      <th>Education</th>\n",
+       "      <th>Customer Lifetime Value</th>\n",
+       "      <th>Income</th>\n",
+       "      <th>Monthly Premium Auto</th>\n",
+       "      <th>Number of Open Complaints</th>\n",
+       "      <th>Policy Type</th>\n",
+       "      <th>Vehicle Class</th>\n",
+       "      <th>Total Claim Amount</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>RB50392</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Master</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1000.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>2.704934</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>QZ44356</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>697953.59%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>94.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>1131.464935</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AI49188</td>\n",
+       "      <td>Nevada</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>1288743.17%</td>\n",
+       "      <td>48767.0</td>\n",
+       "      <td>108.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Two-Door Car</td>\n",
+       "      <td>566.472247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>WW63253</td>\n",
+       "      <td>California</td>\n",
+       "      <td>M</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>764586.18%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>529.881344</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>GA49547</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>M</td>\n",
+       "      <td>High School or Below</td>\n",
+       "      <td>536307.65%</td>\n",
+       "      <td>36357.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>17.269323</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4003</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4004</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4005</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4006</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4007</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>4008 rows × 11 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     Customer          ST GENDER             Education  \\\n",
+       "0     RB50392  Washington    NaN                Master   \n",
+       "1     QZ44356     Arizona      F              Bachelor   \n",
+       "2     AI49188      Nevada      F              Bachelor   \n",
+       "3     WW63253  California      M              Bachelor   \n",
+       "4     GA49547  Washington      M  High School or Below   \n",
+       "...       ...         ...    ...                   ...   \n",
+       "4003      NaN         NaN    NaN                   NaN   \n",
+       "4004      NaN         NaN    NaN                   NaN   \n",
+       "4005      NaN         NaN    NaN                   NaN   \n",
+       "4006      NaN         NaN    NaN                   NaN   \n",
+       "4007      NaN         NaN    NaN                   NaN   \n",
+       "\n",
+       "     Customer Lifetime Value   Income  Monthly Premium Auto  \\\n",
+       "0                        NaN      0.0                1000.0   \n",
+       "1                 697953.59%      0.0                  94.0   \n",
+       "2                1288743.17%  48767.0                 108.0   \n",
+       "3                 764586.18%      0.0                 106.0   \n",
+       "4                 536307.65%  36357.0                  68.0   \n",
+       "...                      ...      ...                   ...   \n",
+       "4003                     NaN      NaN                   NaN   \n",
+       "4004                     NaN      NaN                   NaN   \n",
+       "4005                     NaN      NaN                   NaN   \n",
+       "4006                     NaN      NaN                   NaN   \n",
+       "4007                     NaN      NaN                   NaN   \n",
+       "\n",
+       "     Number of Open Complaints     Policy Type  Vehicle Class  \\\n",
+       "0                       1/0/00   Personal Auto  Four-Door Car   \n",
+       "1                       1/0/00   Personal Auto  Four-Door Car   \n",
+       "2                       1/0/00   Personal Auto   Two-Door Car   \n",
+       "3                       1/0/00  Corporate Auto            SUV   \n",
+       "4                       1/0/00   Personal Auto  Four-Door Car   \n",
+       "...                        ...             ...            ...   \n",
+       "4003                       NaN             NaN            NaN   \n",
+       "4004                       NaN             NaN            NaN   \n",
+       "4005                       NaN             NaN            NaN   \n",
+       "4006                       NaN             NaN            NaN   \n",
+       "4007                       NaN             NaN            NaN   \n",
+       "\n",
+       "      Total Claim Amount  \n",
+       "0               2.704934  \n",
+       "1            1131.464935  \n",
+       "2             566.472247  \n",
+       "3             529.881344  \n",
+       "4              17.269323  \n",
+       "...                  ...  \n",
+       "4003                 NaN  \n",
+       "4004                 NaN  \n",
+       "4005                 NaN  \n",
+       "4006                 NaN  \n",
+       "4007                 NaN  \n",
+       "\n",
+       "[4008 rows x 11 columns]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Loading the data\n",
+    "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\"\n",
+    "df = pd.read_csv(url)\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "a1eac870",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value',\n",
+       "       'Income', 'Monthly Premium Auto', 'Number of Open Complaints',\n",
+       "       'Policy Type', 'Vehicle Class', 'Total Claim Amount'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.columns"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "dd4e8cd8-a6f6-486c-a5c4-1745b0c035f4",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Shape: (4008, 11)\n",
+      "\n",
+      "Data types:\n",
+      " Customer                      object\n",
+      "ST                            object\n",
+      "GENDER                        object\n",
+      "Education                     object\n",
+      "Customer Lifetime Value       object\n",
+      "Income                       float64\n",
+      "Monthly Premium Auto         float64\n",
+      "Number of Open Complaints     object\n",
+      "Policy Type                   object\n",
+      "Vehicle Class                 object\n",
+      "Total Claim Amount           float64\n",
+      "dtype: object\n",
+      "\n",
+      "Unique value counts per column:\n",
+      " Customer                     1071\n",
+      "ST                              8\n",
+      "GENDER                          5\n",
+      "Education                       6\n",
+      "Customer Lifetime Value      1027\n",
+      "Income                        774\n",
+      "Monthly Premium Auto          132\n",
+      "Number of Open Complaints       6\n",
+      "Policy Type                     3\n",
+      "Vehicle Class                   6\n",
+      "Total Claim Amount            761\n",
+      "dtype: int64\n",
+      "\n",
+      "Categorical columns: ['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value', 'Number of Open Complaints', 'Policy Type', 'Vehicle Class']\n",
+      "\n",
+      "Customer -> uniques: ['RB50392' 'QZ44356' 'AI49188' ... 'CW49887' 'MY31220' nan]\n",
+      "\n",
+      "ST -> uniques: ['Washington' 'Arizona' 'Nevada' 'California' 'Oregon' 'Cali' 'AZ' 'WA'\n",
+      " nan]\n",
+      "\n",
+      "GENDER -> uniques: [nan 'F' 'M' 'Femal' 'Male' 'female']\n",
+      "\n",
+      "Education -> uniques: ['Master' 'Bachelor' 'High School or Below' 'College' 'Bachelors' 'Doctor'\n",
+      " nan]\n",
+      "\n",
+      "Customer Lifetime Value -> uniques: [nan '697953.59%' '1288743.17%' ... '2031499.76%' '323912.47%'\n",
+      " '899704.02%']\n",
+      "\n",
+      "Number of Open Complaints -> uniques: ['1/0/00' '1/2/00' '1/1/00' '1/3/00' '1/5/00' '1/4/00' nan]\n",
+      "\n",
+      "Policy Type -> uniques: ['Personal Auto' 'Corporate Auto' 'Special Auto' nan]\n",
+      "\n",
+      "Vehicle Class -> uniques: ['Four-Door Car' 'Two-Door Car' 'SUV' 'Luxury SUV' 'Sports Car'\n",
+      " 'Luxury Car' nan]\n",
+      "\n",
+      "Numeric ranges:\n",
+      "Income: 0.0 to 99960.0\n",
+      "Monthly Premium Auto: 61.0 to 35354.0\n",
+      "Total Claim Amount: 0.382107 to 2893.239678\n",
+      "\n",
+      "Numeric summary:\n",
+      "              Income  Monthly Premium Auto  Total Claim Amount\n",
+      "count   1071.000000           1071.000000         1071.000000\n",
+      "mean   39295.701214            193.234360          404.986909\n",
+      "std    30469.427060           1601.190369          293.027260\n",
+      "min        0.000000             61.000000            0.382107\n",
+      "25%    14072.000000             68.000000          202.157702\n",
+      "50%    36234.000000             83.000000          354.729129\n",
+      "75%    64631.000000            109.500000          532.800000\n",
+      "max    99960.000000          35354.000000         2893.239678\n",
+      "\n",
+      "Median:\n",
+      " Income                  36234.000000\n",
+      "Monthly Premium Auto       83.000000\n",
+      "Total Claim Amount        354.729129\n",
+      "dtype: float64\n",
+      "\n",
+      "Mode:\n",
+      " Income                    0.0\n",
+      "Monthly Premium Auto     65.0\n",
+      "Total Claim Amount      321.6\n",
+      "Name: 0, dtype: float64\n",
+      "\n",
+      "Customer value counts:\n",
+      " Customer\n",
+      "RB50392    1\n",
+      "HJ15383    1\n",
+      "AO74776    1\n",
+      "HQ82233    1\n",
+      "OL72737    1\n",
+      "          ..\n",
+      "RO26085    1\n",
+      "ES57969    1\n",
+      "JK55587    1\n",
+      "RN97635    1\n",
+      "MY31220    1\n",
+      "Name: count, Length: 1071, dtype: int64\n",
+      "Top value: AA71604\n",
+      "\n",
+      "ST value counts:\n",
+      " ST\n",
+      "Oregon        320\n",
+      "California    211\n",
+      "Arizona       186\n",
+      "Cali          120\n",
+      "Nevada         98\n",
+      "Washington     81\n",
+      "WA             30\n",
+      "AZ             25\n",
+      "Name: count, dtype: int64\n",
+      "Top value: Oregon\n",
+      "\n",
+      "GENDER value counts:\n",
+      " GENDER\n",
+      "F         457\n",
+      "M         413\n",
+      "Male       39\n",
+      "female     28\n",
+      "Femal      17\n",
+      "Name: count, dtype: int64\n",
+      "Top value: F\n",
+      "\n",
+      "Education value counts:\n",
+      " Education\n",
+      "Bachelor                324\n",
+      "College                 313\n",
+      "High School or Below    296\n",
+      "Master                   94\n",
+      "Doctor                   37\n",
+      "Bachelors                 7\n",
+      "Name: count, dtype: int64\n",
+      "Top value: Bachelor\n",
+      "\n",
+      "Customer Lifetime Value value counts:\n",
+      " Customer Lifetime Value\n",
+      "445811.34%     4\n",
+      "251459.20%     4\n",
+      "272535.64%     3\n",
+      "578018.22%     3\n",
+      "684615.03%     3\n",
+      "              ..\n",
+      "245357.08%     1\n",
+      "507566.27%     1\n",
+      "321497.94%     1\n",
+      "1227534.31%    1\n",
+      "899704.02%     1\n",
+      "Name: count, Length: 1027, dtype: int64\n",
+      "Top value: 251459.20%\n",
+      "\n",
+      "Number of Open Complaints value counts:\n",
+      " Number of Open Complaints\n",
+      "1/0/00    830\n",
+      "1/1/00    138\n",
+      "1/2/00     50\n",
+      "1/3/00     34\n",
+      "1/4/00     13\n",
+      "1/5/00      6\n",
+      "Name: count, dtype: int64\n",
+      "Top value: 1/0/00\n",
+      "\n",
+      "Policy Type value counts:\n",
+      " Policy Type\n",
+      "Personal Auto     780\n",
+      "Corporate Auto    234\n",
+      "Special Auto       57\n",
+      "Name: count, dtype: int64\n",
+      "Top value: Personal Auto\n",
+      "\n",
+      "Vehicle Class value counts:\n",
+      " Vehicle Class\n",
+      "Four-Door Car    576\n",
+      "Two-Door Car     205\n",
+      "SUV              199\n",
+      "Sports Car        57\n",
+      "Luxury SUV        20\n",
+      "Luxury Car        14\n",
+      "Name: count, dtype: int64\n",
+      "Top value: Four-Door Car\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "# Quick look at basic info\n",
+    "print(\"Shape:\", df.shape)\n",
+    "print(\"\\nData types:\\n\", df.dtypes)\n",
+    "\n",
+    "# maybe check if some columns need fixing\n",
+    "# if 'Customer Lifetime Value' looks weird, convert it:\n",
+    "# df['Customer Lifetime Value'] = pd.to_numeric(df['Customer Lifetime Value'], errors='coerce')\n",
+    "\n",
+    "# Unique counts\n",
+    "print(\"\\nUnique value counts per column:\\n\", df.nunique())\n",
+    "\n",
+    "# Find object-type cols (likely categoricals)\n",
+    "cat_cols = df.select_dtypes(include=\"object\").columns\n",
+    "print(\"\\nCategorical columns:\", list(cat_cols))\n",
+    "\n",
+    "# Peek at their unique values (can get messy if many)\n",
+    "for c in cat_cols:\n",
+    "    print(f\"\\n{c} -> uniques:\", df[c].unique())\n",
+    "\n",
+    "# Ranges for numeric cols\n",
+    "num_cols = df.select_dtypes(include=[\"int64\", \"float64\"]).columns\n",
+    "print(\"\\nNumeric ranges:\")\n",
+    "for c in num_cols:\n",
+    "    print(f\"{c}: {df[c].min()} to {df[c].max()}\")\n",
+    "\n",
+    "# Summary stats\n",
+    "print(\"\\nNumeric summary:\\n\", df[num_cols].describe())\n",
+    "\n",
+    "# Extra quick stats\n",
+    "print(\"\\nMedian:\\n\", df[num_cols].median())\n",
+    "print(\"\\nMode:\\n\", df[num_cols].mode().iloc[0])\n",
+    "\n",
+    "# Frequency for categories\n",
+    "for c in cat_cols:\n",
+    "    print(f\"\\n{c} value counts:\\n\", df[c].value_counts())\n",
+    "    print(\"Top value:\", df[c].mode()[0])\n"
    ]
   },
   {
@@ -116,12 +635,48 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "2dca5073-4520-4f42-9390-4b92733284ed",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Value counts for ST:\n",
+      " ST\n",
+      "Oregon        320\n",
+      "California    211\n",
+      "Arizona       186\n",
+      "Cali          120\n",
+      "Nevada         98\n",
+      "Washington     81\n",
+      "WA             30\n",
+      "AZ             25\n",
+      "Name: count, dtype: int64\n",
+      "\n",
+      "5 least common states (ascending):\n",
+      " ST\n",
+      "AZ             25\n",
+      "WA             30\n",
+      "Washington     81\n",
+      "Nevada         98\n",
+      "Cali          120\n",
+      "Name: count, dtype: int64\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "if 'ST' in df.columns:\n",
+    "    st_counts = df['ST'].value_counts()\n",
+    "    \n",
+    "    # quick peek\n",
+    "    print(\"\\nValue counts for ST:\\n\", st_counts)\n",
+    "    \n",
+    "    # least common 5\n",
+    "    print(\"\\n5 least common states (ascending):\\n\", st_counts.sort_values().head(5))\n",
+    "\n"
    ]
   },
   {
@@ -146,12 +701,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "bcfad6c1-9af2-4b0b-9aa9-0dc5c17473c0",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Total policies sold by type:\n",
+      " Policy Type\n",
+      "Personal Auto     780\n",
+      "Corporate Auto    234\n",
+      "Special Auto       57\n",
+      "Name: count, dtype: int64\n",
+      "\n",
+      "Most sold policy type: Personal Auto with 780 policies\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "# Policy Type analysis\n",
+    "if 'Policy Type' in df.columns:\n",
+    "    policy_counts = df['Policy Type'].value_counts()\n",
+    "    print(\"\\nTotal policies sold by type:\\n\", policy_counts)\n",
+    "    \n",
+    "    # which one is the top seller?\n",
+    "    top_policy = policy_counts.idxmax()\n",
+    "    print(\"\\nMost sold policy type:\", top_policy, \"with\", policy_counts.max(), \"policies\")\n",
+    "else:\n",
+    "    print(\"\\n 'Policy Type' column not found. Check column names.\")\n"
    ]
   },
   {
@@ -176,12 +756,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "id": "0c0563cf-6f8b-463d-a321-651a972f82e5",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Number of Personal Auto policies: 780\n",
+      "Number of Corporate Auto policies: 234\n",
+      "\n",
+      "Average income for Personal Auto: 38180.7\n",
+      "Average income for Corporate Auto: 41390.31\n",
+      "Customers with Personal Auto have a lower average income.\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "# Your code here\n",
+    "\n",
+    "# Compare average income: Personal Auto vs Corporate Auto\n",
+    "if 'Policy Type' in df.columns and 'Income' in df.columns:\n",
+    "    \n",
+    "    personal_df = df.loc[df['Policy Type'] == 'Personal Auto']\n",
+    "    corporate_df = df.loc[df['Policy Type'] == 'Corporate Auto']\n",
+    "    \n",
+    "    # quick sanity check on counts\n",
+    "    print(\"\\nNumber of Personal Auto policies:\", len(personal_df))\n",
+    "    print(\"Number of Corporate Auto policies:\", len(corporate_df))\n",
+    "    \n",
+    "    avg_personal = personal_df['Income'].mean()\n",
+    "    avg_corporate = corporate_df['Income'].mean()\n",
+    "    \n",
+    "    print(\"\\nAverage income for Personal Auto:\", round(avg_personal, 2))\n",
+    "    print(\"Average income for Corporate Auto:\", round(avg_corporate, 2))\n",
+    "    \n",
+    "    if avg_personal < avg_corporate:\n",
+    "        print(\"Customers with Personal Auto have a lower average income.\")\n",
+    "    elif avg_personal > avg_corporate:\n",
+    "        print(\"Customers with Personal Auto have a higher average income.\")\n",
+    "    else:\n",
+    "        print(\"Average incomes are the same.\")\n",
+    "else:\n",
+    "    print(\"\\n Required columns not found. Check column names.\")\n"
    ]
   },
   {
@@ -229,15 +848,62 @@
    "execution_count": null,
    "id": "b731bca6-a760-4860-a27b-a33efa712ce0",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Summary stats for Total Claim Amount:\n",
+      " count    1071.000000\n",
+      "mean      404.986909\n",
+      "std       293.027260\n",
+      "min         0.382107\n",
+      "25%       202.157702\n",
+      "50%       354.729129\n",
+      "75%       532.800000\n",
+      "max      2893.239678\n",
+      "Name: Total Claim Amount, dtype: float64\n",
+      "\n",
+      "75th percentile value: 532.8\n"
+     ]
+    },
+    {
+     "ename": "NameError",
+     "evalue": "name 'cutoff_75_' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[12], line 14\u001b[0m\n\u001b[0;32m     11\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m75th percentile value:\u001b[39m\u001b[38;5;124m\"\u001b[39m, cutoff_75)\n\u001b[0;32m     13\u001b[0m \u001b[38;5;66;03m# filter top 25% customers\u001b[39;00m\n\u001b[1;32m---> 14\u001b[0m high_claim_df \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mloc[df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTotal Claim Amount\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m>\u001b[39m cutoff_75_]()\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'cutoff_75_' is not defined"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "# Your code here\n",
+    "\n",
+    "# high claim amount analysis\n",
+    "if 'Total Claim Amount' in df.columns:\n",
+    "    \n",
+    "    # quick look at stats for context\n",
+    "    print(\"\\nSummary stats for Total Claim Amount:\\n\", df['Total Claim Amount'].describe())\n",
+    "    \n",
+    "    # 75th percentile cutoff\n",
+    "    cutoff_75 = df['Total Claim Amount'].quantile(0.75)\n",
+    "    print(\"\\n75th percentile value:\", cutoff_75)\n",
+    "    \n",
+    "    # filter top 25% customers\n",
+    "    high_claim_df = df.loc[df['Total Claim Amount'] > cutoff_75_]()\n",
+    "\n",
+    "\n",
+    "#I am not entire sure I got this right, but I think it should be something like this"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -251,7 +917,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,

	Customer	ST	GENDER	Education	Customer Lifetime Value	Income	Monthly Premium Auto	Number of Open Complaints	Policy Type	Vehicle Class	Total Claim Amount
0	RB50392	Washington	NaN	Master	NaN	0.0	1000.0	1/0/00	Personal Auto	Four-Door Car	2.704934
1	QZ44356	Arizona	F	Bachelor	697953.59%	0.0	94.0	1/0/00	Personal Auto	Four-Door Car	1131.464935
2	AI49188	Nevada	F	Bachelor	1288743.17%	48767.0	108.0	1/0/00	Personal Auto	Two-Door Car	566.472247
3	WW63253	California	M	Bachelor	764586.18%	0.0	106.0	1/0/00	Corporate Auto	SUV	529.881344
4	GA49547	Washington	M	High School or Below	536307.65%	36357.0	68.0	1/0/00	Personal Auto	Four-Door Car	17.269323
...	...	...	...	...	...	...	...	...	...	...	...
4003	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4004	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4005	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4006	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4007	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN