From fc0bf697a2ed82d36d47eb97b43ca267a15e88be Mon Sep 17 00:00:00 2001
From: Priyanka Marmath <priyanka.marmath@gmail.com>
Date: Wed, 10 Sep 2025 13:50:02 +0200
Subject: [PATCH] lab solved

---
 lab-dw-pandas.ipynb | 715 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 698 insertions(+), 17 deletions(-)
diff --git a/lab-dw-pandas.ipynb b/lab-dw-pandas.ipynb
index fbd468314..3b95873cb 100644
--- a/lab-dw-pandas.ipynb
+++ b/lab-dw-pandas.ipynb
@@ -82,12 +82,540 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "dd4e8cd8-a6f6-486c-a5c4-1745b0c035f4",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Customer</th>\n",
+       "      <th>ST</th>\n",
+       "      <th>GENDER</th>\n",
+       "      <th>Education</th>\n",
+       "      <th>Customer Lifetime Value</th>\n",
+       "      <th>Income</th>\n",
+       "      <th>Monthly Premium Auto</th>\n",
+       "      <th>Number of Open Complaints</th>\n",
+       "      <th>Policy Type</th>\n",
+       "      <th>Vehicle Class</th>\n",
+       "      <th>Total Claim Amount</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>RB50392</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Master</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1000.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>2.704934</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>QZ44356</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>697953.59%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>94.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>1131.464935</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AI49188</td>\n",
+       "      <td>Nevada</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>1288743.17%</td>\n",
+       "      <td>48767.0</td>\n",
+       "      <td>108.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Two-Door Car</td>\n",
+       "      <td>566.472247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>WW63253</td>\n",
+       "      <td>California</td>\n",
+       "      <td>M</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>764586.18%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>529.881344</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>GA49547</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>M</td>\n",
+       "      <td>High School or Below</td>\n",
+       "      <td>536307.65%</td>\n",
+       "      <td>36357.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>17.269323</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4003</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4004</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4005</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4006</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4007</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>4008 rows × 11 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     Customer          ST GENDER             Education  \\\n",
+       "0     RB50392  Washington    NaN                Master   \n",
+       "1     QZ44356     Arizona      F              Bachelor   \n",
+       "2     AI49188      Nevada      F              Bachelor   \n",
+       "3     WW63253  California      M              Bachelor   \n",
+       "4     GA49547  Washington      M  High School or Below   \n",
+       "...       ...         ...    ...                   ...   \n",
+       "4003      NaN         NaN    NaN                   NaN   \n",
+       "4004      NaN         NaN    NaN                   NaN   \n",
+       "4005      NaN         NaN    NaN                   NaN   \n",
+       "4006      NaN         NaN    NaN                   NaN   \n",
+       "4007      NaN         NaN    NaN                   NaN   \n",
+       "\n",
+       "     Customer Lifetime Value   Income  Monthly Premium Auto  \\\n",
+       "0                        NaN      0.0                1000.0   \n",
+       "1                 697953.59%      0.0                  94.0   \n",
+       "2                1288743.17%  48767.0                 108.0   \n",
+       "3                 764586.18%      0.0                 106.0   \n",
+       "4                 536307.65%  36357.0                  68.0   \n",
+       "...                      ...      ...                   ...   \n",
+       "4003                     NaN      NaN                   NaN   \n",
+       "4004                     NaN      NaN                   NaN   \n",
+       "4005                     NaN      NaN                   NaN   \n",
+       "4006                     NaN      NaN                   NaN   \n",
+       "4007                     NaN      NaN                   NaN   \n",
+       "\n",
+       "     Number of Open Complaints     Policy Type  Vehicle Class  \\\n",
+       "0                       1/0/00   Personal Auto  Four-Door Car   \n",
+       "1                       1/0/00   Personal Auto  Four-Door Car   \n",
+       "2                       1/0/00   Personal Auto   Two-Door Car   \n",
+       "3                       1/0/00  Corporate Auto            SUV   \n",
+       "4                       1/0/00   Personal Auto  Four-Door Car   \n",
+       "...                        ...             ...            ...   \n",
+       "4003                       NaN             NaN            NaN   \n",
+       "4004                       NaN             NaN            NaN   \n",
+       "4005                       NaN             NaN            NaN   \n",
+       "4006                       NaN             NaN            NaN   \n",
+       "4007                       NaN             NaN            NaN   \n",
+       "\n",
+       "      Total Claim Amount  \n",
+       "0               2.704934  \n",
+       "1            1131.464935  \n",
+       "2             566.472247  \n",
+       "3             529.881344  \n",
+       "4              17.269323  \n",
+       "...                  ...  \n",
+       "4003                 NaN  \n",
+       "4004                 NaN  \n",
+       "4005                 NaN  \n",
+       "4006                 NaN  \n",
+       "4007                 NaN  \n",
+       "\n",
+       "[4008 rows x 11 columns]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "customer_df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\")\n",
+    "customer_df        #Identify the dimensions of the dataset by determining the number of rows and columns it contains."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "397e3d20-23a6-49ec-bbf1-45dd0f941c50",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 4008 entries, 0 to 4007\n",
+      "Data columns (total 11 columns):\n",
+      " #   Column                     Non-Null Count  Dtype  \n",
+      "---  ------                     --------------  -----  \n",
+      " 0   Customer                   1071 non-null   object \n",
+      " 1   ST                         1071 non-null   object \n",
+      " 2   GENDER                     954 non-null    object \n",
+      " 3   Education                  1071 non-null   object \n",
+      " 4   Customer Lifetime Value    1068 non-null   object \n",
+      " 5   Income                     1071 non-null   float64\n",
+      " 6   Monthly Premium Auto       1071 non-null   float64\n",
+      " 7   Number of Open Complaints  1071 non-null   object \n",
+      " 8   Policy Type                1071 non-null   object \n",
+      " 9   Vehicle Class              1071 non-null   object \n",
+      " 10  Total Claim Amount         1071 non-null   float64\n",
+      "dtypes: float64(3), object(8)\n",
+      "memory usage: 344.6+ KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "customer_df.info()  #Determine the data types of each column \n",
+    "#customer lifetime value datatype can also be float64 as the results are in %."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "53a8a0f8-7327-44ca-8c45-855ec4b77f38",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Customer                      object\n",
+       "ST                            object\n",
+       "GENDER                        object\n",
+       "Education                     object\n",
+       "Customer Lifetime Value       object\n",
+       "Income                       float64\n",
+       "Monthly Premium Auto         float64\n",
+       "Number of Open Complaints     object\n",
+       "Policy Type                   object\n",
+       "Vehicle Class                 object\n",
+       "Total Claim Amount           float64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "customer_df.dtypes  #Identify the number of unique values for each column and determine which columns appear to be categorical. You should also describe the unique values of each categorical column and the range of values for numerical columns, and give your insights."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "474f7668-e8be-4e80-9f3c-ab4b2bfdb723",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Customer                     1071\n",
+       "ST                              8\n",
+       "GENDER                          5\n",
+       "Education                       6\n",
+       "Customer Lifetime Value      1027\n",
+       "Income                        774\n",
+       "Monthly Premium Auto          132\n",
+       "Number of Open Complaints       6\n",
+       "Policy Type                     3\n",
+       "Vehicle Class                   6\n",
+       "Total Claim Amount            761\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "customer_df.nunique()      \n",
+    "\n",
+    "#Categorical Columns: Customer, St, gender, Eductaion, Policy Type, Vehicle Class\n",
+    "#Numerical Columns: Customer Lifetime Value, Income, Monthly Premium Auto, No. of open complaints, Toatl Claim Object"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "ce29d83f-b7ba-426d-8e8d-d37f1cfbbc2e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Income</th>\n",
+       "      <th>Monthly Premium Auto</th>\n",
+       "      <th>Total Claim Amount</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>1071.000000</td>\n",
+       "      <td>1071.000000</td>\n",
+       "      <td>1071.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>39295.701214</td>\n",
+       "      <td>193.234360</td>\n",
+       "      <td>404.986909</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>30469.427060</td>\n",
+       "      <td>1601.190369</td>\n",
+       "      <td>293.027260</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>0.382107</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>14072.000000</td>\n",
+       "      <td>68.000000</td>\n",
+       "      <td>202.157702</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>36234.000000</td>\n",
+       "      <td>83.000000</td>\n",
+       "      <td>354.729129</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>64631.000000</td>\n",
+       "      <td>109.500000</td>\n",
+       "      <td>532.800000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>99960.000000</td>\n",
+       "      <td>35354.000000</td>\n",
+       "      <td>2893.239678</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             Income  Monthly Premium Auto  Total Claim Amount\n",
+       "count   1071.000000           1071.000000         1071.000000\n",
+       "mean   39295.701214            193.234360          404.986909\n",
+       "std    30469.427060           1601.190369          293.027260\n",
+       "min        0.000000             61.000000            0.382107\n",
+       "25%    14072.000000             68.000000          202.157702\n",
+       "50%    36234.000000             83.000000          354.729129\n",
+       "75%    64631.000000            109.500000          532.800000\n",
+       "max    99960.000000          35354.000000         2893.239678"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "customer_df.describe()\n",
+    "#Compute summary statistics such as mean, median, mode, standard deviation, and quartiles "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "dab8f0b7-a4c3-404b-a992-cd953815f692",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0    321.6\n",
+      "Name: Total Claim Amount, dtype: float64\n"
+     ]
+    }
+   ],
+   "source": [
+    "mode_value= customer_df[\"Total Claim Amount\"].mode()\n",
+    "print(mode_value)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "cf77ed18-a7d8-4e54-aba8-aa2ed95e4157",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "354.729129\n"
+     ]
+    }
+   ],
+   "source": [
+    "median_value= customer_df[\"Total Claim Amount\"].median()\n",
+    "print(median_value)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e489da2-ea40-4ccd-bd1d-e20e53fb7d12",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mode_value= customer_df[\"Income\"].mode()\n",
+    "print(mode_value)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "942cfa56-b7b7-484a-8f52-59008d7b89bf",
+   "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here"
+    "median_value= customer_df[\"Income\"].median()\n",
+    "print(median_value)"
    ]
   },
   {
@@ -116,12 +644,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "id": "2dca5073-4520-4f42-9390-4b92733284ed",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "ST\n",
+       "AZ             25\n",
+       "WA             30\n",
+       "Washington     81\n",
+       "Nevada         98\n",
+       "Cali          120\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here"
+    "# top 5 less common customer locations\n",
+    "(customer_df.ST.value_counts()).nsmallest(5)\n"
    ]
   },
   {
@@ -146,12 +692,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "id": "bcfad6c1-9af2-4b0b-9aa9-0dc5c17473c0",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Policies sold by type:\n",
+      " Policy Type\n",
+      "Personal Auto     780\n",
+      "Corporate Auto    234\n",
+      "Special Auto       57\n",
+      "Name: count, dtype: int64\n",
+      "\n",
+      "Most sold policy type: Personal Auto with 780 policies\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "# total number of policies sold for each type of policy\n",
+    "policy_counts = customer_df[\"Policy Type\"].value_counts()\n",
+    "top_policy_type = policy_counts.idxmax()\n",
+    "top_policy_count = policy_counts.max()\n",
+    "\n",
+    "print(\"Policies sold by type:\\n\", policy_counts)\n",
+    "print(\"\\nMost sold policy type:\", top_policy_type, \"with\", top_policy_count, \"policies\")\n"
    ]
   },
   {
@@ -176,12 +743,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "id": "0c0563cf-6f8b-463d-a321-651a972f82e5",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Average Income for Personal Auto customers: 38180.69871794872\n",
+      "Average Income for Corporate Auto customers: 41390.31196581197\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "# if customers with Personal Auto have a lower income than those with Corporate Auto.\n",
+    "personal_auto_df = customer_df.loc[customer_df[\"Policy Type\"] == \"Personal Auto\"]\n",
+    "corporate_auto_df = customer_df.loc[customer_df[\"Policy Type\"] == \"Corporate Auto\"]\n",
+    "\n",
+    "#average income\n",
+    "avg_income_personal = personal_auto_df[\"Income\"].mean()\n",
+    "avg_income_corporate = corporate_auto_df[\"Income\"].mean()\n",
+    "\n",
+    "print(\"Average Income for Personal Auto customers:\", avg_income_personal)\n",
+    "print(\"Average Income for Corporate Auto customers:\", avg_income_corporate)\n"
    ]
   },
   {
@@ -226,20 +811,116 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "id": "b731bca6-a760-4860-a27b-a33efa712ce0",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total Claim Amount Statistics:\n",
+      " count    1071.000000\n",
+      "mean      404.986909\n",
+      "std       293.027260\n",
+      "min         0.382107\n",
+      "25%       202.157702\n",
+      "50%       354.729129\n",
+      "75%       532.800000\n",
+      "max      2893.239678\n",
+      "Name: Total Claim Amount, dtype: float64\n",
+      "\n",
+      "75th Percentile (Threshold for High Claims): 532.8\n",
+      "\n",
+      "High Claim Customers Data:\n",
+      "    Customer          ST GENDER Education Customer Lifetime Value   Income  \\\n",
+      "1   QZ44356     Arizona      F  Bachelor              697953.59%      0.0   \n",
+      "2   AI49188      Nevada      F  Bachelor             1288743.17%  48767.0   \n",
+      "17  OE15005        Cali    NaN   College              394524.16%  28855.0   \n",
+      "23  TZ98966      Nevada    NaN  Bachelor              245019.10%      0.0   \n",
+      "26  US89481  California    NaN  Bachelor              394637.21%      0.0   \n",
+      "\n",
+      "    Monthly Premium Auto Number of Open Complaints     Policy Type  \\\n",
+      "1                   94.0                    1/0/00   Personal Auto   \n",
+      "2                  108.0                    1/0/00   Personal Auto   \n",
+      "17                 101.0                    1/0/00   Personal Auto   \n",
+      "23                  73.0                    1/3/00  Corporate Auto   \n",
+      "26                 111.0                    1/0/00   Personal Auto   \n",
+      "\n",
+      "    Vehicle Class  Total Claim Amount  \n",
+      "1   Four-Door Car         1131.464935  \n",
+      "2    Two-Door Car          566.472247  \n",
+      "17            SUV          647.442031  \n",
+      "23  Four-Door Car          554.376763  \n",
+      "26  Four-Door Car          799.200000  \n",
+      "\n",
+      "Summary Statistics for High Claim Customers:\n",
+      "        Customer      ST GENDER Education Customer Lifetime Value  \\\n",
+      "count       264     264    238       264                     264   \n",
+      "unique      264       7      5         5                     256   \n",
+      "top     QZ44356  Oregon      F  Bachelor              578018.22%   \n",
+      "freq          1      90    115        85                       3   \n",
+      "mean        NaN     NaN    NaN       NaN                     NaN   \n",
+      "std         NaN     NaN    NaN       NaN                     NaN   \n",
+      "min         NaN     NaN    NaN       NaN                     NaN   \n",
+      "25%         NaN     NaN    NaN       NaN                     NaN   \n",
+      "50%         NaN     NaN    NaN       NaN                     NaN   \n",
+      "75%         NaN     NaN    NaN       NaN                     NaN   \n",
+      "max         NaN     NaN    NaN       NaN                     NaN   \n",
+      "\n",
+      "              Income  Monthly Premium Auto Number of Open Complaints  \\\n",
+      "count     264.000000            264.000000                       264   \n",
+      "unique           NaN                   NaN                         6   \n",
+      "top              NaN                   NaN                    1/0/00   \n",
+      "freq             NaN                   NaN                       206   \n",
+      "mean    23677.344697            165.193182                       NaN   \n",
+      "std     27013.483721            623.930992                       NaN   \n",
+      "min         0.000000             63.000000                       NaN   \n",
+      "25%         0.000000             99.000000                       NaN   \n",
+      "50%     18807.000000            114.000000                       NaN   \n",
+      "75%     42423.750000            133.250000                       NaN   \n",
+      "max     99316.000000          10202.000000                       NaN   \n",
+      "\n",
+      "          Policy Type Vehicle Class  Total Claim Amount  \n",
+      "count             264           264          264.000000  \n",
+      "unique              3             6                 NaN  \n",
+      "top     Personal Auto           SUV                 NaN  \n",
+      "freq              191           101                 NaN  \n",
+      "mean              NaN           NaN          782.228263  \n",
+      "std               NaN           NaN          292.751640  \n",
+      "min               NaN           NaN          537.600000  \n",
+      "25%               NaN           NaN          606.521741  \n",
+      "50%               NaN           NaN          679.597985  \n",
+      "75%               NaN           NaN          851.400000  \n",
+      "max               NaN           NaN         2893.239678  \n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "# Your code here\n",
+    "claim_stats = customer_df[\"Total Claim Amount\"].describe()\n",
+    "print(\"Total Claim Amount Statistics:\\n\", claim_stats)\n",
+    "\n",
+    "# Step 2: Find the 75th percentile\n",
+    "q75 = customer_df[\"Total Claim Amount\"].quantile(0.75)\n",
+    "\n",
+    "# Step 3: Filter customers with claim amounts > 75th percentile\n",
+    "high_claims_df = customer_df.loc[customer_df[\"Total Claim Amount\"] > q75]\n",
+    "\n",
+    "# Step 4: Summary statistics about high claim customers\n",
+    "high_claims_summary = high_claims_df.describe(include=\"all\")\n",
+    "\n",
+    "print(\"\\n75th Percentile (Threshold for High Claims):\", q75)\n",
+    "print(\"\\nHigh Claim Customers Data:\\n\", high_claims_df.head())\n",
+    "print(\"\\nSummary Statistics for High Claim Customers:\\n\", high_claims_summary)"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python [conda env:base] *",
    "language": "python",
-   "name": "python3"
+   "name": "conda-base-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -251,7 +932,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,

	Customer	ST	GENDER	Education	Customer Lifetime Value	Income	Monthly Premium Auto	Number of Open Complaints	Policy Type	Vehicle Class	Total Claim Amount
0	RB50392	Washington	NaN	Master	NaN	0.0	1000.0	1/0/00	Personal Auto	Four-Door Car	2.704934
1	QZ44356	Arizona	F	Bachelor	697953.59%	0.0	94.0	1/0/00	Personal Auto	Four-Door Car	1131.464935
2	AI49188	Nevada	F	Bachelor	1288743.17%	48767.0	108.0	1/0/00	Personal Auto	Two-Door Car	566.472247
3	WW63253	California	M	Bachelor	764586.18%	0.0	106.0	1/0/00	Corporate Auto	SUV	529.881344
4	GA49547	Washington	M	High School or Below	536307.65%	36357.0	68.0	1/0/00	Personal Auto	Four-Door Car	17.269323
...	...	...	...	...	...	...	...	...	...	...	...
4003	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4004	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4005	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4006	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4007	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
	Income	Monthly Premium Auto	Total Claim Amount
count	1071.000000	1071.000000	1071.000000
mean	39295.701214	193.234360	404.986909
std	30469.427060	1601.190369	293.027260
min	0.000000	61.000000	0.382107
25%	14072.000000	68.000000	202.157702
50%	36234.000000	83.000000	354.729129
75%	64631.000000	109.500000	532.800000
max	99960.000000	35354.000000	2893.239678