From cbcca474d316a689c6ea56dc740023c1318de03d Mon Sep 17 00:00:00 2001
From: Rui Braz <ruibraz@MacBookPro.home>
Date: Sat, 20 Sep 2025 15:55:44 +0100
Subject: [PATCH] Solved lab

---
 .DS_Store                                     |  Bin 0 -> 6148 bytes
 .../cleaning_functions.cpython-313.pyc        |  Bin 0 -> 3717 bytes
 lab-dw-aggregating.ipynb                      | 1510 +++++++++++++++--
 3 files changed, 1372 insertions(+), 138 deletions(-)
 create mode 100644 .DS_Store
 create mode 100644 __pycache__/cleaning_functions.cpython-313.pyc
diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
GIT binary patch
literal 6148
zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3
zem<@ulZcFPQ@L2!n>{z**<q8>++&mCkOWA81W14cNZ<zv;LbK1Poaz?KmsK2CSc!(
z0ynLxE!0092;Krf2c+FF_Fe*7ECH>lEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ
zLs35+`xjp>T0<F0fCPF1$Cyrb|F7^5{eNG?83~ZUUlGt@xh*qZDeu<Z%US-OSsOPv
j)R!Z4KLME7ReXlK;d!wEw5GODWMKRea10D2@KpjYNUI8I

literal 0
HcmV?d00001

diff --git a/__pycache__/cleaning_functions.cpython-313.pyc b/__pycache__/cleaning_functions.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d8c537324f3b3c2e6c5ae1ab72288456df5325cf
GIT binary patch
literal 3717
zcma)8O>7&-72aJgN&Q&<A!S>QQ+Fd#q_(QaG>%YPv0X>DWXl%iRBOd<bzqmvm9*`0
zmz`Ng65#+eP@n;NsExvidQ;N?QEo=jLmCw5)rX?sf<h@weMr$<a&xRxZ+&l;q$OH)
zGX&r6&dhuBX6Ak0d%JyoJp#0U{o(H`H@gMl-}upLQuAf^yYTW*&;(7qArMiMUJ;0-
zg+PWh8KkU*L58&m$jGv&M!!PEIYVsf<0A_H5d3%l2;!lzAk4$Fv&DsXMJi<kD#e9s
zgM#25n@63FHs1W64|fc}rv~1QH~oG`$|eM%S5Uy7BQ6MbdpfL+{&X}AcJsQZ!9LDO
z3V4_Ry+C(KP@|Pe&C{KNP6}4Vq>66ais9PjqN6x^5js3q$(xGpuA0Qqsj0wRr3}xq
zQ!oiNTw>BR6BQ=rm_$7mrk?JZw^WHm12dFGEvIDbhN;S=3tVGiZj42V$-7yEn79<E
zNQEU90yT;|RTTD<%mRf)GNf$fiC)Rf5Z77Jjh|=US@%|4Cu8gRvBK(DLHG1AeR-Lf
z%erT|&KUSOwq)6!Ni1hMW7wwdz)Nna>=-ys)1`G5<K4Ny$sFG+ISR|-;nP_VRpE=b
zF8p=k^NISg>CYz~UH(vgp#FBW5gYbn!<)v|nSaDGf1dD<O;;y>d3{HY{A%Xj%zba;
z_I-EDtIqiHn>)RI)tUc13k$KUB1NmeQQz*D_XSS`dU^0vAfzA=;gaDe!Y`B#<JD?1
zD$ef>IQYi4AdWYCaaIa@1TD;Gx5Asx3&epT715%tkV0Lwv4|e=Hjh`;(FWUVXkA+O
zIjN-u&mO*VZ-)YWYQ1~>>}mQL%<g+}_F~)jme%}{7iVdQUJ+X3xb`>noUhuchj38)
zDpxOKkWj^P42TbldZyzk@9B0KApsG;<dUN9Q3XRy@zzVw4P6Q?nTEAw8BNF_5~i7$
zRl_6(Zj(3&CTDfqWRWGah|lmeK3QbhMEtOmMwxnU(Ih$BS~5MWXy*8;OdMtcix@7T
z&QwJfF$r-AW#4kj#XQV*mvU~&bRc|-03yrrXeIo527deE3u6m?*qs3J5aMe=cnjY2
zevFobqDpH=l_uehCOBQFp*bdIzQT9|SHiSpTOPcKYZSW_f>*@4&f9u1U(heIRDdiv
zwp?&*IXt!WdD}H~n_h&MVB$$oqeyyHp;lh2-F?4ae!rGC8hOjlTeY7R>v^Z<mi+uL
zo(h5?PD${%BJl@)?Si79cCk=VFR>8y2*K!MQmMdtJU0jD1n4*5+%4O7bQaMmUK<PR
zrINj_N(7^i^#s?D0~<}w2{v>5z%7t8=-?{!35crjG$IU~+Kg`*pJ|WJ_{VNmukXZ<
z-(Pufz7Zet<3pR*>+wu=vbuI}7G@5oYN<<p;_XIa%1=x+5?McyeQeaUg?i%M>SQgd
z`0{H{63I`pA7vj6*2d@jH*VJM{M1kAtwFGnQfulJKRMM%&icvO$G7Xr1somp<)NMA
z$lkHPY4zlf8p+#!^7hw)@Z;dTQqxjv`jh0yC&iD7k0xtBnD^g=$+ub~Um$IfVc6il
zP{L^7RJ8_o&huQ8exSeIIDv(NEYqYe)ahsd*?`v2W=Ct~z<|p3fVD+&5mY)TdxF(q
zEZSibK3i5>_Am=-(U({X#-MKKYzJEEdWofA>>K)YAS5NAS`j@4qUxz!ze3y+Rm@_^
zTUUs?+N384a;gxZ77H_-GN;eA*lX4DRul+&P$L$kW~FHAw9K=12$DT!>d=b2N^r9T
zAxV&f$p8ozK{_m&tiR|w-ip1RE5Q<0xtP<-o~w#nM-8(`0gO93!E%bpMYmvr1UhAr
zqFJzX=TniRZ7*BIR)8LC6JToF0pn3}4%A<TKm7{`j;?T&XVu}&e$1#zKtTVA0AEQz
zp4=o`Bfr1fNS*alXX~kn&%W=cF4W@}+lUKC15`ES5nmqJiq*b1<*U;T^@gwBsH@p+
zc@EMxqO&2N^yQNq(#F}}9cm1W`U9i&fw65l4a2qQw|)6^Lmu|!;mv$a{h^<{)JRSN
z2J6YI+wwJVsp(_^T|C8;{)_05VMiQ2cj4t_=s73Ow^JZYZ8j(cM5r9$a2)9lC+33^
z`t#)7it{@XrpD~)pjzY+6sn9}cAfW30?7<(y(Uk#5*U_ykWI{GlkhZ`W|1J{0N+C$
zXV~Qe)N!njnzm_pxdKmxysRUmuoMe{vKk7=^-$9`)>xDrhGB|lpeW6Z5rb5Vj^u!@
zn`@FwFkuX5o(BOHSp0avrCZl(@u6CLyb-_P$1hZ8|9Rxp2cex<|AW+r=?CeL<Mr6^
z2hy*jUx$RFul_#<`xBUcC@i-5YoT3CIKpC^ulDsKz<Jx(+eY=?UP!Dj;SI#`{6S?B
z7JU*j=VH4o(n76!Y@%PToX-Nll~x^#mqp4Bzeiq}@T`USe_pb9_zC4qiKY?iGD4N(
zwvxPpwaoug;f00inQ1l3%VM7UI7Im!u_I_GS13a*U;wL7)(ezkdT@~ibB%?8YaEZ9
zg)`fW`go95+cgh5O{QTI(ki_LqAGmZeQIN}-u<2G<dg2x8>HSnh;PsPT5kZ(hJ3=8
zPi)HrSboplo7<2#3)MMa9{p18uJydJEuVP~M>WbKB{&wHk`Y+vpgnO<J{356TQv=k
uS=lx(k{Qs!fT2Smo`ytG+>Hog|1((@4?jC}1jHa0Ve#<p(H`+IH}YSD9ynV7

literal 0
HcmV?d00001

diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb
index fadd718..993c5f3 100644
--- a/lab-dw-aggregating.ipynb
+++ b/lab-dw-aggregating.ipynb
@@ -1,165 +1,1399 @@
 {
-  "cells": [
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "31969215-2a90-4d8b-ac36-646a7ae13744",
+   "metadata": {
+    "id": "31969215-2a90-4d8b-ac36-646a7ae13744"
+   },
+   "source": [
+    "# Lab | Data Aggregation and Filtering"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8f08a52-bec0-439b-99cc-11d3809d8b5d",
+   "metadata": {
+    "id": "a8f08a52-bec0-439b-99cc-11d3809d8b5d"
+   },
+   "source": [
+    "In this challenge, we will continue to work with customer data from an insurance company. We will use the dataset called marketing_customer_analysis.csv, which can be found at the following link:\n",
+    "\n",
+    "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\n",
+    "\n",
+    "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by first performing data cleaning, formatting, and structuring."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50",
+   "metadata": {
+    "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50"
+   },
+   "source": [
+    "1. Create a new DataFrame that only includes customers who:\n",
+    "   - have a **low total_claim_amount** (e.g., below $1,000),\n",
+    "   - have a response \"Yes\" to the last marketing campaign."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b9be383e-5165-436e-80c8-57d4c757c8c3",
+   "metadata": {
+    "id": "b9be383e-5165-436e-80c8-57d4c757c8c3"
+   },
+   "source": [
+    "2. Using the original Dataframe, analyze:\n",
+    "   - the average `monthly_premium` and/or customer lifetime value by `policy_type` and `gender` for customers who responded \"Yes\", and\n",
+    "   - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0",
+   "metadata": {
+    "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0"
+   },
+   "source": [
+    "3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d",
+   "metadata": {
+    "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d"
+   },
+   "source": [
+    "4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b42999f9-311f-481e-ae63-40a5577072c5",
+   "metadata": {
+    "id": "b42999f9-311f-481e-ae63-40a5577072c5"
+   },
+   "source": [
+    "## Bonus"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "81ff02c5-6584-4f21-a358-b918697c6432",
+   "metadata": {
+    "id": "81ff02c5-6584-4f21-a358-b918697c6432"
+   },
+   "source": [
+    "5. The marketing team wants to analyze the number of policies sold by state and month. Present the data in a table where the months are arranged as columns and the states are arranged as rows."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b6aec097-c633-4017-a125-e77a97259cda",
+   "metadata": {
+    "id": "b6aec097-c633-4017-a125-e77a97259cda"
+   },
+   "source": [
+    "6.  Display a new DataFrame that contains the number of policies sold by month, by state, for the top 3 states with the highest number of policies sold.\n",
+    "\n",
+    "*Hint:*\n",
+    "- *To accomplish this, you will first need to group the data by state and month, then count the number of policies sold for each group. Afterwards, you will need to sort the data by the count of policies sold in descending order.*\n",
+    "- *Next, you will select the top 3 states with the highest number of policies sold.*\n",
+    "- *Finally, you will create a new DataFrame that contains the number of policies sold by month for each of the top 3 states.*"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009",
+   "metadata": {
+    "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009"
+   },
+   "source": [
+    "7. The marketing team wants to analyze the effect of different marketing channels on the customer response rate.\n",
+    "\n",
+    "Hint: You can use melt to unpivot the data and create a table that shows the customer response rate (those who responded \"Yes\") by marketing channel."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d",
+   "metadata": {
+    "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d"
+   },
+   "source": [
+    "External Resources for Data Filtering: https://towardsdatascience.com/filtering-data-frames-in-pandas-b570b1f834b9"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "449513f4-0459-46a0-a18d-9398d974c9ad",
+   "metadata": {
+    "id": "449513f4-0459-46a0-a18d-9398d974c9ad"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "id": "31969215-2a90-4d8b-ac36-646a7ae13744",
-      "metadata": {
-        "id": "31969215-2a90-4d8b-ac36-646a7ae13744"
-      },
-      "source": [
-        "# Lab | Data Aggregation and Filtering"
-      ]
-    },
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 10910 entries, 0 to 10909\n",
+      "Data columns (total 26 columns):\n",
+      " #   Column                         Non-Null Count  Dtype  \n",
+      "---  ------                         --------------  -----  \n",
+      " 0   Unnamed: 0                     10910 non-null  int64  \n",
+      " 1   Customer                       10910 non-null  object \n",
+      " 2   State                          10279 non-null  object \n",
+      " 3   Customer Lifetime Value        10910 non-null  float64\n",
+      " 4   Response                       10279 non-null  object \n",
+      " 5   Coverage                       10910 non-null  object \n",
+      " 6   Education                      10910 non-null  object \n",
+      " 7   Effective To Date              10910 non-null  object \n",
+      " 8   EmploymentStatus               10910 non-null  object \n",
+      " 9   Gender                         10910 non-null  object \n",
+      " 10  Income                         10910 non-null  int64  \n",
+      " 11  Location Code                  10910 non-null  object \n",
+      " 12  Marital Status                 10910 non-null  object \n",
+      " 13  Monthly Premium Auto           10910 non-null  int64  \n",
+      " 14  Months Since Last Claim        10277 non-null  float64\n",
+      " 15  Months Since Policy Inception  10910 non-null  int64  \n",
+      " 16  Number of Open Complaints      10277 non-null  float64\n",
+      " 17  Number of Policies             10910 non-null  int64  \n",
+      " 18  Policy Type                    10910 non-null  object \n",
+      " 19  Policy                         10910 non-null  object \n",
+      " 20  Renew Offer Type               10910 non-null  object \n",
+      " 21  Sales Channel                  10910 non-null  object \n",
+      " 22  Total Claim Amount             10910 non-null  float64\n",
+      " 23  Vehicle Class                  10288 non-null  object \n",
+      " 24  Vehicle Size                   10288 non-null  object \n",
+      " 25  Vehicle Type                   5428 non-null   object \n",
+      "dtypes: float64(4), int64(5), object(17)\n",
+      "memory usage: 2.2+ MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n",
+    "df = pd.read_csv(url)\n",
+    "\n",
+    "df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "01e8fcc0-0ec9-4e67-be70-27af6398c807",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.drop(columns=['Unnamed: 0'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "cb1be6a7-0f3c-46cd-beab-9ca3ce3cab77",
+   "metadata": {},
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "id": "a8f08a52-bec0-439b-99cc-11d3809d8b5d",
-      "metadata": {
-        "id": "a8f08a52-bec0-439b-99cc-11d3809d8b5d"
-      },
-      "source": [
-        "In this challenge, we will continue to work with customer data from an insurance company. We will use the dataset called marketing_customer_analysis.csv, which can be found at the following link:\n",
-        "\n",
-        "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\n",
-        "\n",
-        "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by first performing data cleaning, formatting, and structuring."
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>customer</th>\n",
+       "      <th>state</th>\n",
+       "      <th>customer_lifetime_value</th>\n",
+       "      <th>response</th>\n",
+       "      <th>coverage</th>\n",
+       "      <th>education</th>\n",
+       "      <th>effective_to_date</th>\n",
+       "      <th>employmentstatus</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>income</th>\n",
+       "      <th>...</th>\n",
+       "      <th>number_of_open_complaints</th>\n",
+       "      <th>number_of_policies</th>\n",
+       "      <th>policy_type</th>\n",
+       "      <th>policy</th>\n",
+       "      <th>renew_offer_type</th>\n",
+       "      <th>sales_channel</th>\n",
+       "      <th>total_claim_amount</th>\n",
+       "      <th>vehicle_class</th>\n",
+       "      <th>vehicle_size</th>\n",
+       "      <th>vehicle_type</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>DK49336</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>4809.216960</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2/18/11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>48029</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>9</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>Corporate L3</td>\n",
+       "      <td>Offer3</td>\n",
+       "      <td>Agent</td>\n",
+       "      <td>292.800000</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>KX64629</td>\n",
+       "      <td>California</td>\n",
+       "      <td>2228.525238</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>College</td>\n",
+       "      <td>1/18/11</td>\n",
+       "      <td>Unemployed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L3</td>\n",
+       "      <td>Offer4</td>\n",
+       "      <td>Call Center</td>\n",
+       "      <td>744.924331</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>LZ68649</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>14947.917300</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>2/10/11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>22139</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L3</td>\n",
+       "      <td>Offer3</td>\n",
+       "      <td>Call Center</td>\n",
+       "      <td>480.000000</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>XL78013</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>22332.439460</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Extended</td>\n",
+       "      <td>College</td>\n",
+       "      <td>1/11/11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>49078</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>Corporate L3</td>\n",
+       "      <td>Offer2</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>484.013411</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>QA50777</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>9025.067525</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Premium</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>1/17/11</td>\n",
+       "      <td>Medical Leave</td>\n",
+       "      <td>F</td>\n",
+       "      <td>23675</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>7</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L2</td>\n",
+       "      <td>Offer1</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>707.925645</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10905</th>\n",
+       "      <td>FE99816</td>\n",
+       "      <td>Nevada</td>\n",
+       "      <td>15563.369440</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Premium</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>1/19/11</td>\n",
+       "      <td>Unemployed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>7</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L1</td>\n",
+       "      <td>Offer3</td>\n",
+       "      <td>Web</td>\n",
+       "      <td>1214.400000</td>\n",
+       "      <td>Luxury Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10906</th>\n",
+       "      <td>KX53892</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>5259.444853</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>College</td>\n",
+       "      <td>1/6/11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>61146</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L3</td>\n",
+       "      <td>Offer2</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>273.018929</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10907</th>\n",
+       "      <td>TL39050</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>23893.304100</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Extended</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>2/6/11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>39837</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>Corporate L3</td>\n",
+       "      <td>Offer1</td>\n",
+       "      <td>Web</td>\n",
+       "      <td>381.306996</td>\n",
+       "      <td>Luxury SUV</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10908</th>\n",
+       "      <td>WA60547</td>\n",
+       "      <td>California</td>\n",
+       "      <td>11971.977650</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Premium</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2/13/11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>64195</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L1</td>\n",
+       "      <td>Offer1</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>618.288849</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10909</th>\n",
+       "      <td>IV32877</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6857.519928</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>1/8/11</td>\n",
+       "      <td>Unemployed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L1</td>\n",
+       "      <td>Offer4</td>\n",
+       "      <td>Web</td>\n",
+       "      <td>1021.719397</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>10910 rows × 25 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      customer       state  customer_lifetime_value response  coverage  \\\n",
+       "0      DK49336     Arizona              4809.216960       No     Basic   \n",
+       "1      KX64629  California              2228.525238       No     Basic   \n",
+       "2      LZ68649  Washington             14947.917300       No     Basic   \n",
+       "3      XL78013      Oregon             22332.439460      Yes  Extended   \n",
+       "4      QA50777      Oregon              9025.067525       No   Premium   \n",
+       "...        ...         ...                      ...      ...       ...   \n",
+       "10905  FE99816      Nevada             15563.369440       No   Premium   \n",
+       "10906  KX53892      Oregon              5259.444853       No     Basic   \n",
+       "10907  TL39050     Arizona             23893.304100       No  Extended   \n",
+       "10908  WA60547  California             11971.977650       No   Premium   \n",
+       "10909  IV32877         NaN              6857.519928      NaN     Basic   \n",
+       "\n",
+       "      education effective_to_date employmentstatus gender  income  ...  \\\n",
+       "0       College           2/18/11         Employed      M   48029  ...   \n",
+       "1       College           1/18/11       Unemployed      F       0  ...   \n",
+       "2      Bachelor           2/10/11         Employed      M   22139  ...   \n",
+       "3       College           1/11/11         Employed      M   49078  ...   \n",
+       "4      Bachelor           1/17/11    Medical Leave      F   23675  ...   \n",
+       "...         ...               ...              ...    ...     ...  ...   \n",
+       "10905  Bachelor           1/19/11       Unemployed      F       0  ...   \n",
+       "10906   College            1/6/11         Employed      F   61146  ...   \n",
+       "10907  Bachelor            2/6/11         Employed      F   39837  ...   \n",
+       "10908   College           2/13/11         Employed      F   64195  ...   \n",
+       "10909  Bachelor            1/8/11       Unemployed      M       0  ...   \n",
+       "\n",
+       "      number_of_open_complaints number_of_policies     policy_type  \\\n",
+       "0                           0.0                  9  Corporate Auto   \n",
+       "1                           0.0                  1   Personal Auto   \n",
+       "2                           0.0                  2   Personal Auto   \n",
+       "3                           0.0                  2  Corporate Auto   \n",
+       "4                           NaN                  7   Personal Auto   \n",
+       "...                         ...                ...             ...   \n",
+       "10905                       NaN                  7   Personal Auto   \n",
+       "10906                       0.0                  6   Personal Auto   \n",
+       "10907                       0.0                  2  Corporate Auto   \n",
+       "10908                       4.0                  6   Personal Auto   \n",
+       "10909                       0.0                  3   Personal Auto   \n",
+       "\n",
+       "             policy  renew_offer_type  sales_channel  total_claim_amount  \\\n",
+       "0      Corporate L3            Offer3          Agent          292.800000   \n",
+       "1       Personal L3            Offer4    Call Center          744.924331   \n",
+       "2       Personal L3            Offer3    Call Center          480.000000   \n",
+       "3      Corporate L3            Offer2         Branch          484.013411   \n",
+       "4       Personal L2            Offer1         Branch          707.925645   \n",
+       "...             ...               ...            ...                 ...   \n",
+       "10905   Personal L1            Offer3            Web         1214.400000   \n",
+       "10906   Personal L3            Offer2         Branch          273.018929   \n",
+       "10907  Corporate L3            Offer1            Web          381.306996   \n",
+       "10908   Personal L1            Offer1         Branch          618.288849   \n",
+       "10909   Personal L1            Offer4            Web         1021.719397   \n",
+       "\n",
+       "       vehicle_class vehicle_size vehicle_type  \n",
+       "0      Four-Door Car      Medsize          NaN  \n",
+       "1      Four-Door Car      Medsize          NaN  \n",
+       "2                SUV      Medsize            A  \n",
+       "3      Four-Door Car      Medsize            A  \n",
+       "4      Four-Door Car      Medsize          NaN  \n",
+       "...              ...          ...          ...  \n",
+       "10905     Luxury Car      Medsize            A  \n",
+       "10906  Four-Door Car      Medsize            A  \n",
+       "10907     Luxury SUV      Medsize          NaN  \n",
+       "10908            SUV      Medsize            A  \n",
+       "10909            SUV      Medsize          NaN  \n",
+       "\n",
+       "[10910 rows x 25 columns]"
       ]
-    },
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.columns = [col.lower().replace(' ', '_') for col in df.columns]\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "11814331-8748-41ba-a054-464d92080595",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df['effective_to_date'] = pd.to_datetime(df['effective_to_date'], format='%m/%d/%y')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "51018226-6dda-49f1-b4f3-491bdca531dd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "median_value = df['months_since_last_claim'].median()\n",
+    "\n",
+    "df['months_since_last_claim'] = df['months_since_last_claim'].fillna(median_value)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "693ccfc5-3c93-4f6d-9c2e-92692f12bd7d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mode_value = df['state'].mode()[0]\n",
+    "\n",
+    "df['state'] = df['state'].fillna(mode_value)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "0122860d-b438-4417-bdfb-c87681a3a4ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.drop(columns=['vehicle_type'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "0add733f-d2fc-4c6f-a4c2-b3265a7ff72c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df['number_of_open_complaints'] = df['number_of_open_complaints'].fillna(df['number_of_open_complaints'].median())\n",
+    "df['response'] = df['response'].fillna(df['response'].mode()[0])\n",
+    "df['vehicle_class'] = df['vehicle_class'].fillna(df['vehicle_class'].mode()[0])\n",
+    "df['vehicle_size'] = df['vehicle_size'].fillna(df['vehicle_size'].mode()[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "ae18e49b-a95f-42dc-8b18-ed790e2dc766",
+   "metadata": {},
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50",
-      "metadata": {
-        "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50"
-      },
-      "source": [
-        "1. Create a new DataFrame that only includes customers who:\n",
-        "   - have a **low total_claim_amount** (e.g., below $1,000),\n",
-        "   - have a response \"Yes\" to the last marketing campaign."
+     "data": {
+      "text/plain": [
+       "customer                         0\n",
+       "state                            0\n",
+       "customer_lifetime_value          0\n",
+       "response                         0\n",
+       "coverage                         0\n",
+       "education                        0\n",
+       "effective_to_date                0\n",
+       "employmentstatus                 0\n",
+       "gender                           0\n",
+       "income                           0\n",
+       "location_code                    0\n",
+       "marital_status                   0\n",
+       "monthly_premium_auto             0\n",
+       "months_since_last_claim          0\n",
+       "months_since_policy_inception    0\n",
+       "number_of_open_complaints        0\n",
+       "number_of_policies               0\n",
+       "policy_type                      0\n",
+       "policy                           0\n",
+       "renew_offer_type                 0\n",
+       "sales_channel                    0\n",
+       "total_claim_amount               0\n",
+       "vehicle_class                    0\n",
+       "vehicle_size                     0\n",
+       "dtype: int64"
       ]
-    },
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.isna().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "1b46795a-31a6-451d-95dc-97850251a278",
+   "metadata": {},
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "id": "b9be383e-5165-436e-80c8-57d4c757c8c3",
-      "metadata": {
-        "id": "b9be383e-5165-436e-80c8-57d4c757c8c3"
-      },
-      "source": [
-        "2. Using the original Dataframe, analyze:\n",
-        "   - the average `monthly_premium` and/or customer lifetime value by `policy_type` and `gender` for customers who responded \"Yes\", and\n",
-        "   - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company."
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New DataFrame of low-risk, engaged customers:\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0",
-      "metadata": {
-        "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0"
-      },
-      "source": [
-        "3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers."
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>customer</th>\n",
+       "      <th>state</th>\n",
+       "      <th>customer_lifetime_value</th>\n",
+       "      <th>response</th>\n",
+       "      <th>coverage</th>\n",
+       "      <th>education</th>\n",
+       "      <th>effective_to_date</th>\n",
+       "      <th>employmentstatus</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>income</th>\n",
+       "      <th>...</th>\n",
+       "      <th>months_since_policy_inception</th>\n",
+       "      <th>number_of_open_complaints</th>\n",
+       "      <th>number_of_policies</th>\n",
+       "      <th>policy_type</th>\n",
+       "      <th>policy</th>\n",
+       "      <th>renew_offer_type</th>\n",
+       "      <th>sales_channel</th>\n",
+       "      <th>total_claim_amount</th>\n",
+       "      <th>vehicle_class</th>\n",
+       "      <th>vehicle_size</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>XL78013</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>22332.439460</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Extended</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2011-01-11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>49078</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>Corporate L3</td>\n",
+       "      <td>Offer2</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>484.013411</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>FM55990</td>\n",
+       "      <td>California</td>\n",
+       "      <td>5989.773931</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Premium</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2011-01-19</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>66839</td>\n",
+       "      <td>...</td>\n",
+       "      <td>24</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L1</td>\n",
+       "      <td>Offer2</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>739.200000</td>\n",
+       "      <td>Sports Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>CW49887</td>\n",
+       "      <td>California</td>\n",
+       "      <td>4626.801093</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>Master</td>\n",
+       "      <td>2011-01-16</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>79487</td>\n",
+       "      <td>...</td>\n",
+       "      <td>87</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Special Auto</td>\n",
+       "      <td>Special L1</td>\n",
+       "      <td>Offer2</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>547.200000</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>NJ54277</td>\n",
+       "      <td>California</td>\n",
+       "      <td>3746.751625</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Extended</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2011-02-26</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>41479</td>\n",
+       "      <td>...</td>\n",
+       "      <td>38</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L2</td>\n",
+       "      <td>Offer2</td>\n",
+       "      <td>Call Center</td>\n",
+       "      <td>19.575683</td>\n",
+       "      <td>Two-Door Car</td>\n",
+       "      <td>Large</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>MQ68407</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>4376.363592</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Premium</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>2011-02-28</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>63774</td>\n",
+       "      <td>...</td>\n",
+       "      <td>63</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L3</td>\n",
+       "      <td>Offer2</td>\n",
+       "      <td>Agent</td>\n",
+       "      <td>60.036683</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 24 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   customer       state  customer_lifetime_value response  coverage education  \\\n",
+       "3   XL78013      Oregon             22332.439460      Yes  Extended   College   \n",
+       "8   FM55990  California              5989.773931      Yes   Premium   College   \n",
+       "15  CW49887  California              4626.801093      Yes     Basic    Master   \n",
+       "19  NJ54277  California              3746.751625      Yes  Extended   College   \n",
+       "27  MQ68407      Oregon              4376.363592      Yes   Premium  Bachelor   \n",
+       "\n",
+       "   effective_to_date employmentstatus gender  income  ...  \\\n",
+       "3         2011-01-11         Employed      M   49078  ...   \n",
+       "8         2011-01-19         Employed      M   66839  ...   \n",
+       "15        2011-01-16         Employed      F   79487  ...   \n",
+       "19        2011-02-26         Employed      F   41479  ...   \n",
+       "27        2011-02-28         Employed      F   63774  ...   \n",
+       "\n",
+       "   months_since_policy_inception number_of_open_complaints  \\\n",
+       "3                              3                       0.0   \n",
+       "8                             24                       0.0   \n",
+       "15                            87                       0.0   \n",
+       "19                            38                       1.0   \n",
+       "27                            63                       0.0   \n",
+       "\n",
+       "    number_of_policies     policy_type        policy  renew_offer_type  \\\n",
+       "3                    2  Corporate Auto  Corporate L3            Offer2   \n",
+       "8                    1   Personal Auto   Personal L1            Offer2   \n",
+       "15                   1    Special Auto    Special L1            Offer2   \n",
+       "19                   1   Personal Auto   Personal L2            Offer2   \n",
+       "27                   1   Personal Auto   Personal L3            Offer2   \n",
+       "\n",
+       "    sales_channel total_claim_amount  vehicle_class vehicle_size  \n",
+       "3          Branch         484.013411  Four-Door Car      Medsize  \n",
+       "8          Branch         739.200000     Sports Car      Medsize  \n",
+       "15         Branch         547.200000            SUV      Medsize  \n",
+       "19    Call Center          19.575683   Two-Door Car        Large  \n",
+       "27          Agent          60.036683  Four-Door Car      Medsize  \n",
+       "\n",
+       "[5 rows x 24 columns]"
       ]
-    },
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#1. Create a new DataFrame that only includes customers who:\n",
+    "# have a low total_claim_amount (e.g., below $1,000),\n",
+    "# have a response \"Yes\" to the last marketing campaign.\n",
+    "\n",
+    "# Create the two conditions\n",
+    "low_claim_condition = df['total_claim_amount'] < 1000\n",
+    "yes_response_condition = df['response'] == 'Yes'\n",
+    "\n",
+    "# Combine the conditions with '&' and apply them to the DataFrame\n",
+    "# Note the parentheses around each condition, which is important!\n",
+    "low_risk_customers_df = df[low_claim_condition & yes_response_condition]\n",
+    "\n",
+    "print(\"New DataFrame of low-risk, engaged customers:\")\n",
+    "low_risk_customers_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "71aae6dd-657f-4c93-b729-d2153e7d70ad",
+   "metadata": {},
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d",
-      "metadata": {
-        "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d"
-      },
-      "source": [
-        "4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions."
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Profitability Analysis:\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "id": "b42999f9-311f-481e-ae63-40a5577072c5",
-      "metadata": {
-        "id": "b42999f9-311f-481e-ae63-40a5577072c5"
-      },
-      "source": [
-        "## Bonus"
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>average_monthly_premium</th>\n",
+       "      <th>average_clv</th>\n",
+       "      <th>average_claim_amount</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>policy_type</th>\n",
+       "      <th>gender</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">Corporate Auto</th>\n",
+       "      <th>F</th>\n",
+       "      <td>94.30</td>\n",
+       "      <td>7712.63</td>\n",
+       "      <td>433.74</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>M</th>\n",
+       "      <td>92.19</td>\n",
+       "      <td>7944.47</td>\n",
+       "      <td>408.58</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">Personal Auto</th>\n",
+       "      <th>F</th>\n",
+       "      <td>99.00</td>\n",
+       "      <td>8339.79</td>\n",
+       "      <td>452.97</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>M</th>\n",
+       "      <td>91.09</td>\n",
+       "      <td>7448.38</td>\n",
+       "      <td>457.01</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">Special Auto</th>\n",
+       "      <th>F</th>\n",
+       "      <td>92.31</td>\n",
+       "      <td>7691.58</td>\n",
+       "      <td>453.28</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>M</th>\n",
+       "      <td>86.34</td>\n",
+       "      <td>8247.09</td>\n",
+       "      <td>429.53</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                       average_monthly_premium  average_clv  \\\n",
+       "policy_type    gender                                         \n",
+       "Corporate Auto F                         94.30      7712.63   \n",
+       "               M                         92.19      7944.47   \n",
+       "Personal Auto  F                         99.00      8339.79   \n",
+       "               M                         91.09      7448.38   \n",
+       "Special Auto   F                         92.31      7691.58   \n",
+       "               M                         86.34      8247.09   \n",
+       "\n",
+       "                       average_claim_amount  \n",
+       "policy_type    gender                        \n",
+       "Corporate Auto F                     433.74  \n",
+       "               M                     408.58  \n",
+       "Personal Auto  F                     452.97  \n",
+       "               M                     457.01  \n",
+       "Special Auto   F                     453.28  \n",
+       "               M                     429.53  "
       ]
-    },
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#2. Using the original Dataframe, analyze:\n",
+    "# the average monthly_premium and/or customer lifetime value by policy_type and gender for customers who responded \"Yes\", and\n",
+    "# compare these insights to total_claim_amount patterns, and discuss which segments appear most profitable or low-risk for the company.\n",
+    "\n",
+    "# First, filter for customers who responded \"Yes\"\n",
+    "yes_customers = df[df['response'] == 'Yes']\n",
+    "\n",
+    "# Now, group by policy type and gender, and aggregate the key metrics\n",
+    "profitability_analysis = yes_customers.groupby(['policy_type', 'gender']).agg(\n",
+    "    average_monthly_premium=('monthly_premium_auto', 'mean'),\n",
+    "    average_clv=('customer_lifetime_value', 'mean'),\n",
+    "    average_claim_amount=('total_claim_amount', 'mean')\n",
+    ")\n",
+    "\n",
+    "print(\"Profitability Analysis:\")\n",
+    "profitability_analysis.round(2) # Round for readability"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "a54ef7b5-ae49-4b9a-a568-4c103b18ae76",
+   "metadata": {},
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "id": "81ff02c5-6584-4f21-a358-b918697c6432",
-      "metadata": {
-        "id": "81ff02c5-6584-4f21-a358-b918697c6432"
-      },
-      "source": [
-        "5. The marketing team wants to analyze the number of policies sold by state and month. Present the data in a table where the months are arranged as columns and the states are arranged as rows."
-      ]
-    },
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "States with over 500 customers:\n",
+      "state\n",
+      "California    4183\n",
+      "Oregon        2909\n",
+      "Arizona       1937\n",
+      "Nevada         993\n",
+      "Washington     888\n",
+      "Name: count, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "#3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers.\n",
+    "\n",
+    "# Get the number of customers in each state\n",
+    "state_counts = df['state'].value_counts()\n",
+    "\n",
+    "# Filter the results to only include states with more than 500 customers\n",
+    "states_over_500 = state_counts[state_counts > 500]\n",
+    "\n",
+    "print(\"States with over 500 customers:\")\n",
+    "print(states_over_500)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "f17ca56f-50bf-4ae2-bc61-b418e1b77f1e",
+   "metadata": {},
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "id": "b6aec097-c633-4017-a125-e77a97259cda",
-      "metadata": {
-        "id": "b6aec097-c633-4017-a125-e77a97259cda"
-      },
-      "source": [
-        "6.  Display a new DataFrame that contains the number of policies sold by month, by state, for the top 3 states with the highest number of policies sold.\n",
-        "\n",
-        "*Hint:*\n",
-        "- *To accomplish this, you will first need to group the data by state and month, then count the number of policies sold for each group. Afterwards, you will need to sort the data by the count of policies sold in descending order.*\n",
-        "- *Next, you will select the top 3 states with the highest number of policies sold.*\n",
-        "- *Finally, you will create a new DataFrame that contains the number of policies sold by month for each of the top 3 states.*"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CLV Analysis by Education and Gender:\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009",
-      "metadata": {
-        "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009"
-      },
-      "source": [
-        "7. The marketing team wants to analyze the effect of different marketing channels on the customer response rate.\n",
-        "\n",
-        "Hint: You can use melt to unpivot the data and create a table that shows the customer response rate (those who responded \"Yes\") by marketing channel."
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>max</th>\n",
+       "      <th>min</th>\n",
+       "      <th>median</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>education</th>\n",
+       "      <th>gender</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">Bachelor</th>\n",
+       "      <th>F</th>\n",
+       "      <td>73225.96</td>\n",
+       "      <td>1904.00</td>\n",
+       "      <td>5640.51</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>M</th>\n",
+       "      <td>67907.27</td>\n",
+       "      <td>1898.01</td>\n",
+       "      <td>5548.03</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">College</th>\n",
+       "      <th>F</th>\n",
+       "      <td>61850.19</td>\n",
+       "      <td>1898.68</td>\n",
+       "      <td>5623.61</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>M</th>\n",
+       "      <td>61134.68</td>\n",
+       "      <td>1918.12</td>\n",
+       "      <td>6005.85</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">Doctor</th>\n",
+       "      <th>F</th>\n",
+       "      <td>44856.11</td>\n",
+       "      <td>2395.57</td>\n",
+       "      <td>5332.46</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>M</th>\n",
+       "      <td>32677.34</td>\n",
+       "      <td>2267.60</td>\n",
+       "      <td>5577.67</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">High School or Below</th>\n",
+       "      <th>F</th>\n",
+       "      <td>55277.45</td>\n",
+       "      <td>2144.92</td>\n",
+       "      <td>6039.55</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>M</th>\n",
+       "      <td>83325.38</td>\n",
+       "      <td>1940.98</td>\n",
+       "      <td>6286.73</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">Master</th>\n",
+       "      <th>F</th>\n",
+       "      <td>51016.07</td>\n",
+       "      <td>2417.78</td>\n",
+       "      <td>5729.86</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>M</th>\n",
+       "      <td>50568.26</td>\n",
+       "      <td>2272.31</td>\n",
+       "      <td>5579.10</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                  max      min   median\n",
+       "education            gender                            \n",
+       "Bachelor             F       73225.96  1904.00  5640.51\n",
+       "                     M       67907.27  1898.01  5548.03\n",
+       "College              F       61850.19  1898.68  5623.61\n",
+       "                     M       61134.68  1918.12  6005.85\n",
+       "Doctor               F       44856.11  2395.57  5332.46\n",
+       "                     M       32677.34  2267.60  5577.67\n",
+       "High School or Below F       55277.45  2144.92  6039.55\n",
+       "                     M       83325.38  1940.98  6286.73\n",
+       "Master               F       51016.07  2417.78  5729.86\n",
+       "                     M       50568.26  2272.31  5579.10"
       ]
-    },
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions.\n",
+    "\n",
+    "clv_by_segment = df.groupby(['education', 'gender'])['customer_lifetime_value'].agg(['max', 'min', 'median'])\n",
+    "\n",
+    "print(\"CLV Analysis by Education and Gender:\")\n",
+    "clv_by_segment.round(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "ff100757-8f5a-4285-bbce-a86d23a5e067",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Convert the column to datetime objects\n",
+    "df['effective_to_date'] = pd.to_datetime(df['effective_to_date'])\n",
+    "\n",
+    "# Extract the month number and create a new 'month' column\n",
+    "df['month'] = df['effective_to_date'].dt.month"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "1ee8cb70-082c-4cf9-b76a-64733ec71e9b",
+   "metadata": {},
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d",
-      "metadata": {
-        "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d"
-      },
-      "source": [
-        "External Resources for Data Filtering: https://towardsdatascience.com/filtering-data-frames-in-pandas-b570b1f834b9"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Policies Sold by State and Month:\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "449513f4-0459-46a0-a18d-9398d974c9ad",
-      "metadata": {
-        "id": "449513f4-0459-46a0-a18d-9398d974c9ad"
-      },
-      "outputs": [],
-      "source": [
-        "# your code goes here"
+     "data": {
+      "text/plain": [
+       "month  state     \n",
+       "1      Arizona       3052\n",
+       "       California    6666\n",
+       "       Nevada        1493\n",
+       "       Oregon        4697\n",
+       "       Washington    1358\n",
+       "2      Arizona       2864\n",
+       "       California    5901\n",
+       "       Nevada        1278\n",
+       "       Oregon        3969\n",
+       "       Washington    1225\n",
+       "dtype: int64"
       ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
     }
-  ],
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "display_name": "Python 3 (ipykernel)",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.9.13"
+   ],
+   "source": [
+    "#5. The marketing team wants to analyze the number of policies sold by state and month. \n",
+    "# Present the data in a table where the months are arranged as columns and the states are arranged as rows.\n",
+    "\n",
+    "# Create the pivot table\n",
+    "# We add fill_value=0 to make sure any state/month combo with no sales shows a 0 instead of NaN\n",
+    "policies_pivot = df.pivot_table(\n",
+    "    index='state',\n",
+    "    columns='month',\n",
+    "    values='number_of_policies',\n",
+    "    aggfunc='sum',\n",
+    "    fill_value=0\n",
+    ")\n",
+    "\n",
+    "print(\"Policies Sold by State and Month:\")\n",
+    "policies_pivot.unstack()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "67445039-fb08-43a9-977b-25433aaaae2f",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Top 3 States by Number of Policies Sold:\n",
+      "month          1     2\n",
+      "state                 \n",
+      "California  6666  5901\n",
+      "Oregon      4697  3969\n",
+      "Arizona     3052  2864\n"
+     ]
     }
+   ],
+   "source": [
+    "#6. Display a new DataFrame that contains the number of policies sold by month, by state, for the top 3 states with the highest number of policies sold.\n",
+    "\n",
+    "# Create a 'total' column by summing the policies across the months (the columns)\n",
+    "policies_pivot['total_policies'] = policies_pivot.sum(axis=1)\n",
+    "\n",
+    "# Sort the table by this new 'total' column and get the index (the state names) of the top 3\n",
+    "top_3_states = policies_pivot.sort_values(by='total_policies', ascending=False).head(3).index\n",
+    "\n",
+    "# Use .loc to select only the rows for the top 3 states from our pivot table\n",
+    "top_3_df = policies_pivot.loc[top_3_states]\n",
+    "\n",
+    "# Drop the temporary 'total_policies' column for a clean final table\n",
+    "top_3_df = top_3_df.drop(columns=['total_policies'])\n",
+    "\n",
+    "\n",
+    "print(\"\\nTop 3 States by Number of Policies Sold:\")\n",
+    "print(top_3_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "98aaa755-4214-4d70-9335-3b953ece995f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "😮‍💨"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python [conda env:base] *",
+   "language": "python",
+   "name": "conda-base-py"
   },
-  "nbformat": 4,
-  "nbformat_minor": 5
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
 }

	customer	state	customer_lifetime_value	response	coverage	education	effective_to_date	employmentstatus	gender	income	...	number_of_open_complaints	number_of_policies	policy_type	policy	renew_offer_type	sales_channel	total_claim_amount	vehicle_class	vehicle_size	vehicle_type
0	DK49336	Arizona	4809.216960	No	Basic	College	2/18/11	Employed	M	48029	...	0.0	9	Corporate Auto	Corporate L3	Offer3	Agent	292.800000	Four-Door Car	Medsize	NaN
1	KX64629	California	2228.525238	No	Basic	College	1/18/11	Unemployed	F	0	...	0.0	1	Personal Auto	Personal L3	Offer4	Call Center	744.924331	Four-Door Car	Medsize	NaN
2	LZ68649	Washington	14947.917300	No	Basic	Bachelor	2/10/11	Employed	M	22139	...	0.0	2	Personal Auto	Personal L3	Offer3	Call Center	480.000000	SUV	Medsize	A
3	XL78013	Oregon	22332.439460	Yes	Extended	College	1/11/11	Employed	M	49078	...	0.0	2	Corporate Auto	Corporate L3	Offer2	Branch	484.013411	Four-Door Car	Medsize	A
4	QA50777	Oregon	9025.067525	No	Premium	Bachelor	1/17/11	Medical Leave	F	23675	...	NaN	7	Personal Auto	Personal L2	Offer1	Branch	707.925645	Four-Door Car	Medsize	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
10905	FE99816	Nevada	15563.369440	No	Premium	Bachelor	1/19/11	Unemployed	F	0	...	NaN	7	Personal Auto	Personal L1	Offer3	Web	1214.400000	Luxury Car	Medsize	A
10906	KX53892	Oregon	5259.444853	No	Basic	College	1/6/11	Employed	F	61146	...	0.0	6	Personal Auto	Personal L3	Offer2	Branch	273.018929	Four-Door Car	Medsize	A
10907	TL39050	Arizona	23893.304100	No	Extended	Bachelor	2/6/11	Employed	F	39837	...	0.0	2	Corporate Auto	Corporate L3	Offer1	Web	381.306996	Luxury SUV	Medsize	NaN
10908	WA60547	California	11971.977650	No	Premium	College	2/13/11	Employed	F	64195	...	4.0	6	Personal Auto	Personal L1	Offer1	Branch	618.288849	SUV	Medsize	A
10909	IV32877	NaN	6857.519928	NaN	Basic	Bachelor	1/8/11	Unemployed	M	0	...	0.0	3	Personal Auto	Personal L1	Offer4	Web	1021.719397	SUV	Medsize	NaN
		average_monthly_premium	average_clv	average_claim_amount
policy_type	gender
Corporate Auto	F	94.30	7712.63	433.74
M	92.19	7944.47	408.58
Personal Auto	F	99.00	8339.79	452.97
M	91.09	7448.38	457.01
Special Auto	F	92.31	7691.58	453.28
M	86.34	8247.09	429.53
		max	min	median
education	gender
Bachelor	F	73225.96	1904.00	5640.51
M	67907.27	1898.01	5548.03
College	F	61850.19	1898.68	5623.61
M	61134.68	1918.12	6005.85
Doctor	F	44856.11	2395.57	5332.46
M	32677.34	2267.60	5577.67
High School or Below	F	55277.45	2144.92	6039.55
M	83325.38	1940.98	6286.73
Master	F	51016.07	2417.78	5729.86
M	50568.26	2272.31	5579.10