From 5b4c1e00de76df4607d419f79501fcd56f07a2fa Mon Sep 17 00:00:00 2001
From: Miguel Florindo <miguelflorindo0gmail.com>
Date: Sat, 13 Sep 2025 16:12:55 +0100
Subject: [PATCH] 'PandasFinishedLab'

---
 lab-dw-pandas.ipynb | 604 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 587 insertions(+), 17 deletions(-)
diff --git a/lab-dw-pandas.ipynb b/lab-dw-pandas.ipynb
index fbd468314..2a6075b66 100644
--- a/lab-dw-pandas.ipynb
+++ b/lab-dw-pandas.ipynb
@@ -82,12 +82,455 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 156,
    "id": "dd4e8cd8-a6f6-486c-a5c4-1745b0c035f4",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DataSetDimensions: (4008, 11)\n",
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 4008 entries, 0 to 4007\n",
+      "Data columns (total 11 columns):\n",
+      " #   Column                     Non-Null Count  Dtype  \n",
+      "---  ------                     --------------  -----  \n",
+      " 0   Customer                   1071 non-null   object \n",
+      " 1   ST                         1071 non-null   object \n",
+      " 2   GENDER                     954 non-null    object \n",
+      " 3   Education                  1071 non-null   object \n",
+      " 4   Customer Lifetime Value    1068 non-null   object \n",
+      " 5   Income                     1071 non-null   float64\n",
+      " 6   Monthly Premium Auto       1071 non-null   float64\n",
+      " 7   Number of Open Complaints  1071 non-null   object \n",
+      " 8   Policy Type                1071 non-null   object \n",
+      " 9   Vehicle Class              1071 non-null   object \n",
+      " 10  Total Claim Amount         1071 non-null   float64\n",
+      "dtypes: float64(3), object(8)\n",
+      "memory usage: 344.6+ KB\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Customer</th>\n",
+       "      <th>ST</th>\n",
+       "      <th>GENDER</th>\n",
+       "      <th>Education</th>\n",
+       "      <th>Customer Lifetime Value</th>\n",
+       "      <th>Income</th>\n",
+       "      <th>Monthly Premium Auto</th>\n",
+       "      <th>Number of Open Complaints</th>\n",
+       "      <th>Policy Type</th>\n",
+       "      <th>Vehicle Class</th>\n",
+       "      <th>Total Claim Amount</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>RB50392</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Master</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1000.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>2.704934</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>QZ44356</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>697953.59%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>94.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>1131.464935</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AI49188</td>\n",
+       "      <td>Nevada</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>1288743.17%</td>\n",
+       "      <td>48767.0</td>\n",
+       "      <td>108.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Two-Door Car</td>\n",
+       "      <td>566.472247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>WW63253</td>\n",
+       "      <td>California</td>\n",
+       "      <td>M</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>764586.18%</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>529.881344</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>GA49547</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>M</td>\n",
+       "      <td>High School or Below</td>\n",
+       "      <td>536307.65%</td>\n",
+       "      <td>36357.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>1/0/00</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>17.269323</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  Customer          ST GENDER             Education Customer Lifetime Value  \\\n",
+       "0  RB50392  Washington    NaN                Master                     NaN   \n",
+       "1  QZ44356     Arizona      F              Bachelor              697953.59%   \n",
+       "2  AI49188      Nevada      F              Bachelor             1288743.17%   \n",
+       "3  WW63253  California      M              Bachelor              764586.18%   \n",
+       "4  GA49547  Washington      M  High School or Below              536307.65%   \n",
+       "\n",
+       "    Income  Monthly Premium Auto Number of Open Complaints     Policy Type  \\\n",
+       "0      0.0                1000.0                    1/0/00   Personal Auto   \n",
+       "1      0.0                  94.0                    1/0/00   Personal Auto   \n",
+       "2  48767.0                 108.0                    1/0/00   Personal Auto   \n",
+       "3      0.0                 106.0                    1/0/00  Corporate Auto   \n",
+       "4  36357.0                  68.0                    1/0/00   Personal Auto   \n",
+       "\n",
+       "   Vehicle Class  Total Claim Amount  \n",
+       "0  Four-Door Car            2.704934  \n",
+       "1  Four-Door Car         1131.464935  \n",
+       "2   Two-Door Car          566.472247  \n",
+       "3            SUV          529.881344  \n",
+       "4  Four-Door Car           17.269323  "
+      ]
+     },
+     "execution_count": 156,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here"
+    "import pandas as pd\n",
+    "\n",
+    "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\"\n",
+    "\n",
+    "\n",
+    "df = pd.read_csv(url)\n",
+    "\n",
+    "\n",
+    "print(f\"DataSetDimensions: {df.shape}\")\n",
+    "\n",
+    "df.info()\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 157,
+   "id": "8992496a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "     Customer          ST GENDER             Education  \\\n",
+      "0     RB50392  Washington    NaN                Master   \n",
+      "1     QZ44356     Arizona      F              Bachelor   \n",
+      "2     AI49188      Nevada      F              Bachelor   \n",
+      "3     WW63253  California      M              Bachelor   \n",
+      "4     GA49547  Washington      M  High School or Below   \n",
+      "...       ...         ...    ...                   ...   \n",
+      "4003      NaN         NaN    NaN                   NaN   \n",
+      "4004      NaN         NaN    NaN                   NaN   \n",
+      "4005      NaN         NaN    NaN                   NaN   \n",
+      "4006      NaN         NaN    NaN                   NaN   \n",
+      "4007      NaN         NaN    NaN                   NaN   \n",
+      "\n",
+      "     Customer Lifetime Value   Income  Monthly Premium Auto  \\\n",
+      "0                        NaN      0.0                1000.0   \n",
+      "1                  697953.59      0.0                  94.0   \n",
+      "2                 1288743.17  48767.0                 108.0   \n",
+      "3                  764586.18      0.0                 106.0   \n",
+      "4                  536307.65  36357.0                  68.0   \n",
+      "...                      ...      ...                   ...   \n",
+      "4003                     NaN      NaN                   NaN   \n",
+      "4004                     NaN      NaN                   NaN   \n",
+      "4005                     NaN      NaN                   NaN   \n",
+      "4006                     NaN      NaN                   NaN   \n",
+      "4007                     NaN      NaN                   NaN   \n",
+      "\n",
+      "     Number of Open Complaints     Policy Type  Vehicle Class  \\\n",
+      "0                       1/0/00   Personal Auto  Four-Door Car   \n",
+      "1                       1/0/00   Personal Auto  Four-Door Car   \n",
+      "2                       1/0/00   Personal Auto   Two-Door Car   \n",
+      "3                       1/0/00  Corporate Auto            SUV   \n",
+      "4                       1/0/00   Personal Auto  Four-Door Car   \n",
+      "...                        ...             ...            ...   \n",
+      "4003                       NaN             NaN            NaN   \n",
+      "4004                       NaN             NaN            NaN   \n",
+      "4005                       NaN             NaN            NaN   \n",
+      "4006                       NaN             NaN            NaN   \n",
+      "4007                       NaN             NaN            NaN   \n",
+      "\n",
+      "      Total Claim Amount  \n",
+      "0               2.704934  \n",
+      "1            1131.464935  \n",
+      "2             566.472247  \n",
+      "3             529.881344  \n",
+      "4              17.269323  \n",
+      "...                  ...  \n",
+      "4003                 NaN  \n",
+      "4004                 NaN  \n",
+      "4005                 NaN  \n",
+      "4006                 NaN  \n",
+      "4007                 NaN  \n",
+      "\n",
+      "[4008 rows x 11 columns]\n",
+      "     Customer          ST GENDER             Education  \\\n",
+      "0     RB50392  Washington      0                Master   \n",
+      "1     QZ44356     Arizona      F              Bachelor   \n",
+      "2     AI49188      Nevada      F              Bachelor   \n",
+      "3     WW63253  California      M              Bachelor   \n",
+      "4     GA49547  Washington      M  High School or Below   \n",
+      "...       ...         ...    ...                   ...   \n",
+      "4003        0           0      0                     0   \n",
+      "4004        0           0      0                     0   \n",
+      "4005        0           0      0                     0   \n",
+      "4006        0           0      0                     0   \n",
+      "4007        0           0      0                     0   \n",
+      "\n",
+      "     Customer Lifetime Value   Income  Monthly Premium Auto  \\\n",
+      "0                          0      0.0                1000.0   \n",
+      "1                  697953.59      0.0                  94.0   \n",
+      "2                 1288743.17  48767.0                 108.0   \n",
+      "3                  764586.18      0.0                 106.0   \n",
+      "4                  536307.65  36357.0                  68.0   \n",
+      "...                      ...      ...                   ...   \n",
+      "4003                       0      0.0                   0.0   \n",
+      "4004                       0      0.0                   0.0   \n",
+      "4005                       0      0.0                   0.0   \n",
+      "4006                       0      0.0                   0.0   \n",
+      "4007                       0      0.0                   0.0   \n",
+      "\n",
+      "     Number of Open Complaints     Policy Type  Vehicle Class  \\\n",
+      "0                       1/0/00   Personal Auto  Four-Door Car   \n",
+      "1                       1/0/00   Personal Auto  Four-Door Car   \n",
+      "2                       1/0/00   Personal Auto   Two-Door Car   \n",
+      "3                       1/0/00  Corporate Auto            SUV   \n",
+      "4                       1/0/00   Personal Auto  Four-Door Car   \n",
+      "...                        ...             ...            ...   \n",
+      "4003                         0               0              0   \n",
+      "4004                         0               0              0   \n",
+      "4005                         0               0              0   \n",
+      "4006                         0               0              0   \n",
+      "4007                         0               0              0   \n",
+      "\n",
+      "      Total Claim Amount  \n",
+      "0               2.704934  \n",
+      "1            1131.464935  \n",
+      "2             566.472247  \n",
+      "3             529.881344  \n",
+      "4              17.269323  \n",
+      "...                  ...  \n",
+      "4003            0.000000  \n",
+      "4004            0.000000  \n",
+      "4005            0.000000  \n",
+      "4006            0.000000  \n",
+      "4007            0.000000  \n",
+      "\n",
+      "[4008 rows x 11 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "df[\"Customer Lifetime Value\"] = df[\"Customer Lifetime Value\"].str.replace('%', '', regex=False)\n",
+    "\n",
+    "df[\"GENDER\"] = df[\"GENDER\"].str.replace(' ', '', regex=False)\n",
+    "print(df)\n",
+    "\n",
+    "df_clean = df.fillna(0)\n",
+    "print(df_clean)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 158,
+   "id": "ee780b5e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Customer                     1071\n",
+      "ST                              8\n",
+      "GENDER                          5\n",
+      "Education                       6\n",
+      "Customer Lifetime Value      1027\n",
+      "Income                        774\n",
+      "Monthly Premium Auto          132\n",
+      "Number of Open Complaints       6\n",
+      "Policy Type                     3\n",
+      "Vehicle Class                   6\n",
+      "Total Claim Amount            761\n",
+      "dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "unique_counts = df.nunique()\n",
+    "print(unique_counts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 159,
+   "id": "9c8a316f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[nan 'F' 'M']\n",
+      "\n",
+      "--- ST ---\n",
+      "ST\n",
+      "Oregon        320\n",
+      "California    211\n",
+      "Arizona       186\n",
+      "Cali          120\n",
+      "Nevada         98\n",
+      "Washington     81\n",
+      "WA             30\n",
+      "AZ             25\n",
+      "Name: count, dtype: int64\n",
+      "\n",
+      "--- GENDER ---\n",
+      "GENDER\n",
+      "F    502\n",
+      "M    452\n",
+      "Name: count, dtype: int64\n",
+      "\n",
+      "--- Education ---\n",
+      "Education\n",
+      "Bachelor                324\n",
+      "College                 313\n",
+      "High School or Below    296\n",
+      "Master                   94\n",
+      "Doctor                   37\n",
+      "Bachelors                 7\n",
+      "Name: count, dtype: int64\n",
+      "\n",
+      "--- Policy Type ---\n",
+      "Policy Type\n",
+      "Personal Auto     780\n",
+      "Corporate Auto    234\n",
+      "Special Auto       57\n",
+      "Name: count, dtype: int64\n",
+      "\n",
+      "--- Vehicle Class ---\n",
+      "Vehicle Class\n",
+      "Four-Door Car    576\n",
+      "Two-Door Car     205\n",
+      "SUV              199\n",
+      "Sports Car        57\n",
+      "Luxury SUV        20\n",
+      "Luxury Car        14\n",
+      "Name: count, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "\n",
+    "gender_mapping = {\n",
+    "    'F': 'F', 'Femal': 'F', 'female': 'F', 'Male': 'M', 'M': 'M',\n",
+    "}\n",
+    "\n",
+    "df['GENDER'] = df['GENDER'].replace(gender_mapping)\n",
+    "\n",
+    "print(df['GENDER'].unique())\n",
+    "\n",
+    "categorical_cols = ['ST', 'GENDER', 'Education', 'Policy Type', 'Vehicle Class']\n",
+    "for col in categorical_cols:\n",
+    "    print(f\"\\n--- {col} ---\")\n",
+    "    print(df[col].value_counts().head(10))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 160,
+   "id": "6fee1ea8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "             Income  Monthly Premium Auto  Total Claim Amount\n",
+      "count   1071.000000           1071.000000         1071.000000\n",
+      "mean   39295.701214            193.234360          404.986909\n",
+      "std    30469.427060           1601.190369          293.027260\n",
+      "min        0.000000             61.000000            0.382107\n",
+      "25%    14072.000000             68.000000          202.157702\n",
+      "50%    36234.000000             83.000000          354.729129\n",
+      "75%    64631.000000            109.500000          532.800000\n",
+      "max    99960.000000          35354.000000         2893.239678\n",
+      "mode       0.000000             65.000000          321.600000\n"
+     ]
+    }
+   ],
+   "source": [
+    "numerical_cols = ['Customer Lifetime Value', 'Income', 'Monthly Premium Auto', 'Number of Open Complaints', 'Total Claim Amount']\n",
+    "summary = df[numerical_cols].describe()\n",
+    "\n",
+    "\n",
+    "mode_vals = df[numerical_cols].mode().iloc[0]\n",
+    "summary.loc['mode'] = mode_vals\n",
+    "\n",
+    "print(summary)"
    ]
   },
   {
@@ -116,12 +559,58 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 161,
+   "id": "243dfc12",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "State frequencies (descending):\n",
+      "ST\n",
+      "Oregon        320\n",
+      "California    211\n",
+      "Arizona       186\n",
+      "Cali          120\n",
+      "Nevada         98\n",
+      "Name: count, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "most_frequent = df['ST'].value_counts().head(5)\n",
+    "\n",
+    "print(\"State frequencies (descending):\")\n",
+    "print(most_frequent)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 162,
    "id": "2dca5073-4520-4f42-9390-4b92733284ed",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "State frequencies (ascending):\n",
+      "ST\n",
+      "AZ             25\n",
+      "WA             30\n",
+      "Washington     81\n",
+      "Nevada         98\n",
+      "Cali          120\n",
+      "Name: count, dtype: int64\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "least_frequent = df['ST'].value_counts(ascending=True).head(5)\n",
+    "print(\"\\nState frequencies (ascending):\")\n",
+    "print(least_frequent)"
    ]
   },
   {
@@ -146,12 +635,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 163,
    "id": "bcfad6c1-9af2-4b0b-9aa9-0dc5c17473c0",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of policies sold by type:\n",
+      "Policy Type\n",
+      "Personal Auto     780\n",
+      "Corporate Auto    234\n",
+      "Special Auto       57\n",
+      "Name: count, dtype: int64\n",
+      "\n",
+      "Policy type with the highest sales: 'Personal Auto'\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "policy_counts = df['Policy Type'].value_counts()\n",
+    "\n",
+    "print(\"Number of policies sold by type:\")\n",
+    "print(policy_counts)\n",
+    "\n",
+    "top_policy_type = policy_counts.idxmax()\n",
+    "\n",
+    "print(f\"\\nPolicy type with the highest sales: '{top_policy_type}'\")"
    ]
   },
   {
@@ -176,12 +687,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 164,
    "id": "0c0563cf-6f8b-463d-a321-651a972f82e5",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Average Income by Policy Type:\n",
+      "Personal Auto: $38,180.70\n",
+      "Corporate Auto: $41,390.31\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "personal_auto_df = df.loc[df['Policy Type'] == 'Personal Auto']\n",
+    "\n",
+    "corporate_auto_df = df.loc[df['Policy Type'] == 'Corporate Auto']\n",
+    "\n",
+    "avg_income_personal = personal_auto_df['Income'].mean()\n",
+    "avg_income_corporate = corporate_auto_df['Income'].mean()\n",
+    "\n",
+    "print(\"Average Income by Policy Type:\")\n",
+    "print(f\"Personal Auto: ${avg_income_personal:,.2f}\")\n",
+    "print(f\"Corporate Auto: ${avg_income_corporate:,.2f}\")"
    ]
   },
   {
@@ -226,18 +756,58 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 165,
    "id": "b731bca6-a760-4860-a27b-a33efa712ce0",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "             Income  Monthly Premium Auto  Total Claim Amount\n",
+      "count    264.000000            264.000000          264.000000\n",
+      "mean   23677.344697            165.193182          782.228263\n",
+      "std    27013.483721            623.930992          292.751640\n",
+      "min        0.000000             63.000000          537.600000\n",
+      "25%        0.000000             99.000000          606.521741\n",
+      "50%    18807.000000            114.000000          679.597985\n",
+      "75%    42423.750000            133.250000          851.400000\n",
+      "max    99316.000000          10202.000000         2893.239678\n",
+      "       Total Claim Amount        Income  Monthly Premium Auto\n",
+      "count          264.000000    264.000000            264.000000\n",
+      "mean           782.228263  23677.344697            165.193182\n",
+      "std            292.751640  27013.483721            623.930992\n",
+      "min            537.600000      0.000000             63.000000\n",
+      "25%            606.521741      0.000000             99.000000\n",
+      "50%            679.597985  18807.000000            114.000000\n",
+      "75%            851.400000  42423.750000            133.250000\n",
+      "max           2893.239678  99316.000000          10202.000000\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here"
+    "claim_75th = df['Total Claim Amount'].quantile(0.75)\n",
+    "\n",
+    "high_claim_df = df.loc[df['Total Claim Amount'] > claim_75th]\n",
+    "\n",
+    "key_columns = ['Total Claim Amount', 'Income', 'Customer Lifetime Value', 'Monthly Premium Auto']\n",
+    "\n",
+    "print(high_claim_df.describe())\n",
+    "print(high_claim_df[key_columns].describe())"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d6166e3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -251,7 +821,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,

	Customer	ST	GENDER	Education	Customer Lifetime Value	Income	Monthly Premium Auto	Number of Open Complaints	Policy Type	Vehicle Class	Total Claim Amount
0	RB50392	Washington	NaN	Master	NaN	0.0	1000.0	1/0/00	Personal Auto	Four-Door Car	2.704934
1	QZ44356	Arizona	F	Bachelor	697953.59%	0.0	94.0	1/0/00	Personal Auto	Four-Door Car	1131.464935
2	AI49188	Nevada	F	Bachelor	1288743.17%	48767.0	108.0	1/0/00	Personal Auto	Two-Door Car	566.472247
3	WW63253	California	M	Bachelor	764586.18%	0.0	106.0	1/0/00	Corporate Auto	SUV	529.881344
4	GA49547	Washington	M	High School or Below	536307.65%	36357.0	68.0	1/0/00	Personal Auto	Four-Door Car	17.269323