diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb
index fadd718..d9b14bc 100644
--- a/lab-dw-aggregating.ipynb
+++ b/lab-dw-aggregating.ipynb
@@ -36,6 +36,254 @@
" - have a response \"Yes\" to the last marketing campaign."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "7766cfde",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n",
+ "df = pd.read_csv(url)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "18ddaef0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.columns = df.columns.str.lower().str.replace(\" \", \"_\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "1155c44c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unnamed:_0 | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " response | \n",
+ " coverage | \n",
+ " education | \n",
+ " effective_to_date | \n",
+ " employmentstatus | \n",
+ " gender | \n",
+ " ... | \n",
+ " number_of_open_complaints | \n",
+ " number_of_policies | \n",
+ " policy_type | \n",
+ " policy | \n",
+ " renew_offer_type | \n",
+ " sales_channel | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ " vehicle_size | \n",
+ " vehicle_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/11/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 8 | \n",
+ " FM55990 | \n",
+ " California | \n",
+ " 5989.773931 | \n",
+ " Yes | \n",
+ " Premium | \n",
+ " College | \n",
+ " 1/19/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 739.200000 | \n",
+ " Sports Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 15 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " 4626.801093 | \n",
+ " Yes | \n",
+ " Basic | \n",
+ " Master | \n",
+ " 1/16/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Special Auto | \n",
+ " Special L1 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 547.200000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 19 | \n",
+ " NJ54277 | \n",
+ " California | \n",
+ " 3746.751625 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2/26/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer2 | \n",
+ " Call Center | \n",
+ " 19.575683 | \n",
+ " Two-Door Car | \n",
+ " Large | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 27 | \n",
+ " MQ68407 | \n",
+ " Oregon | \n",
+ " 4376.363592 | \n",
+ " Yes | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2/28/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Agent | \n",
+ " 60.036683 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unnamed:_0 customer state customer_lifetime_value response \\\n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "8 8 FM55990 California 5989.773931 Yes \n",
+ "15 15 CW49887 California 4626.801093 Yes \n",
+ "19 19 NJ54277 California 3746.751625 Yes \n",
+ "27 27 MQ68407 Oregon 4376.363592 Yes \n",
+ "\n",
+ " coverage education effective_to_date employmentstatus gender ... \\\n",
+ "3 Extended College 1/11/11 Employed M ... \n",
+ "8 Premium College 1/19/11 Employed M ... \n",
+ "15 Basic Master 1/16/11 Employed F ... \n",
+ "19 Extended College 2/26/11 Employed F ... \n",
+ "27 Premium Bachelor 2/28/11 Employed F ... \n",
+ "\n",
+ " number_of_open_complaints number_of_policies policy_type \\\n",
+ "3 0.0 2 Corporate Auto \n",
+ "8 0.0 1 Personal Auto \n",
+ "15 0.0 1 Special Auto \n",
+ "19 1.0 1 Personal Auto \n",
+ "27 0.0 1 Personal Auto \n",
+ "\n",
+ " policy renew_offer_type sales_channel total_claim_amount \\\n",
+ "3 Corporate L3 Offer2 Branch 484.013411 \n",
+ "8 Personal L1 Offer2 Branch 739.200000 \n",
+ "15 Special L1 Offer2 Branch 547.200000 \n",
+ "19 Personal L2 Offer2 Call Center 19.575683 \n",
+ "27 Personal L3 Offer2 Agent 60.036683 \n",
+ "\n",
+ " vehicle_class vehicle_size vehicle_type \n",
+ "3 Four-Door Car Medsize A \n",
+ "8 Sports Car Medsize NaN \n",
+ "15 SUV Medsize NaN \n",
+ "19 Two-Door Car Large A \n",
+ "27 Four-Door Car Medsize NaN \n",
+ "\n",
+ "[5 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "low_claim_yes = df[(df['total_claim_amount'] < 1000) & (df['response'] == \"Yes\")]\n",
+ "low_claim_yes.head()"
+ ]
+ },
{
"cell_type": "markdown",
"id": "b9be383e-5165-436e-80c8-57d4c757c8c3",
@@ -48,6 +296,112 @@
" - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "bea60123",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "yes_responders = df[df['response'] == \"Yes\"]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "e9102999",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "grouped = yes_responders.groupby(['policy_type', 'gender'])[['monthly_premium_auto', 'customer_lifetime_value']].mean().reset_index()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "d2b66241",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " policy_type | \n",
+ " gender | \n",
+ " total_claim_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " F | \n",
+ " 433.738499 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Corporate Auto | \n",
+ " M | \n",
+ " 408.582459 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " F | \n",
+ " 452.965929 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " M | \n",
+ " 457.010178 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Special Auto | \n",
+ " F | \n",
+ " 453.280164 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " policy_type gender total_claim_amount\n",
+ "0 Corporate Auto F 433.738499\n",
+ "1 Corporate Auto M 408.582459\n",
+ "2 Personal Auto F 452.965929\n",
+ "3 Personal Auto M 457.010178\n",
+ "4 Special Auto F 453.280164"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "claim_pattern = yes_responders.groupby(['policy_type', 'gender'])['total_claim_amount'].mean().reset_index()\n",
+ "claim_pattern.head()"
+ ]
+ },
{
"cell_type": "markdown",
"id": "7050f4ac-53c5-4193-a3c0-8699b87196f0",
@@ -58,6 +412,17 @@
"3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "616bcc9a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "state_counts = df['state'].value_counts()\n",
+ "states_over_500 = state_counts[state_counts > 500]\n"
+ ]
+ },
{
"cell_type": "markdown",
"id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d",
@@ -68,6 +433,113 @@
"4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "4bcf8d06",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " education | \n",
+ " gender | \n",
+ " max | \n",
+ " min | \n",
+ " median | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Bachelor | \n",
+ " F | \n",
+ " 73225.95652 | \n",
+ " 1904.000852 | \n",
+ " 5640.505303 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 67907.27050 | \n",
+ " 1898.007675 | \n",
+ " 5548.031892 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " College | \n",
+ " F | \n",
+ " 61850.18803 | \n",
+ " 1898.683686 | \n",
+ " 5623.611187 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " College | \n",
+ " M | \n",
+ " 61134.68307 | \n",
+ " 1918.119700 | \n",
+ " 6005.847375 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Doctor | \n",
+ " F | \n",
+ " 44856.11397 | \n",
+ " 2395.570000 | \n",
+ " 5332.462694 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " education gender max min median\n",
+ "0 Bachelor F 73225.95652 1904.000852 5640.505303\n",
+ "1 Bachelor M 67907.27050 1898.007675 5548.031892\n",
+ "2 College F 61850.18803 1898.683686 5623.611187\n",
+ "3 College M 61134.68307 1918.119700 6005.847375\n",
+ "4 Doctor F 44856.11397 2395.570000 5332.462694"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clv_stats = df.groupby(['education', 'gender'])['customer_lifetime_value'].agg(['max', 'min', 'median']).reset_index()\n",
+ "clv_stats.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0d6e712c",
+ "metadata": {},
+ "source": [
+ "Doctorate-level customers show a lower maximum CLV\n",
+ "Customers with a Bachelor's degree tend to have the highest maximum CLV"
+ ]
+ },
{
"cell_type": "markdown",
"id": "b42999f9-311f-481e-ae63-40a5577072c5",
@@ -88,6 +560,112 @@
"5. The marketing team wants to analyze the number of policies sold by state and month. Present the data in a table where the months are arranged as columns and the states are arranged as rows."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "2eb4fabe",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\levgi\\AppData\\Local\\Temp\\ipykernel_11876\\3086097951.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
+ " df['effective_to_date'] = pd.to_datetime(df['effective_to_date'])\n"
+ ]
+ }
+ ],
+ "source": [
+ "df['effective_to_date'] = pd.to_datetime(df['effective_to_date'])\n",
+ "df['month'] = df['effective_to_date'].dt.strftime('%B')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "ab84c1ca",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " month | \n",
+ " February | \n",
+ " January | \n",
+ "
\n",
+ " \n",
+ " state | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Arizona | \n",
+ " 929 | \n",
+ " 1008 | \n",
+ "
\n",
+ " \n",
+ " California | \n",
+ " 1634 | \n",
+ " 1918 | \n",
+ "
\n",
+ " \n",
+ " Nevada | \n",
+ " 442 | \n",
+ " 551 | \n",
+ "
\n",
+ " \n",
+ " Oregon | \n",
+ " 1344 | \n",
+ " 1565 | \n",
+ "
\n",
+ " \n",
+ " Washington | \n",
+ " 425 | \n",
+ " 463 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "month February January\n",
+ "state \n",
+ "Arizona 929 1008\n",
+ "California 1634 1918\n",
+ "Nevada 442 551\n",
+ "Oregon 1344 1565\n",
+ "Washington 425 463"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "policies_by_state_month = df.pivot_table(index='state', columns='month', values='policy', aggfunc='count', fill_value=0)\n",
+ "policies_by_state_month.head()"
+ ]
+ },
{
"cell_type": "markdown",
"id": "b6aec097-c633-4017-a125-e77a97259cda",
@@ -103,6 +681,93 @@
"- *Finally, you will create a new DataFrame that contains the number of policies sold by month for each of the top 3 states.*"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "e43cdb63",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "top_states = df['state'].value_counts().nlargest(3).index\n",
+ "top_states = top_states.sort_values(ascending=False)\n",
+ "top_states_df = df[df['state'].isin(top_states)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "9713939d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " month | \n",
+ " February | \n",
+ " January | \n",
+ "
\n",
+ " \n",
+ " state | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Arizona | \n",
+ " 929 | \n",
+ " 1008 | \n",
+ "
\n",
+ " \n",
+ " California | \n",
+ " 1634 | \n",
+ " 1918 | \n",
+ "
\n",
+ " \n",
+ " Oregon | \n",
+ " 1344 | \n",
+ " 1565 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "month February January\n",
+ "state \n",
+ "Arizona 929 1008\n",
+ "California 1634 1918\n",
+ "Oregon 1344 1565"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "grouped_top = top_states_df.groupby(['state', 'month'])['policy'].count().reset_index()\n",
+ "top3_pivot = grouped_top.pivot(index='state', columns='month', values='policy').fillna(0)\n",
+ "top3_pivot.head()"
+ ]
+ },
{
"cell_type": "markdown",
"id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009",
@@ -127,14 +792,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"id": "449513f4-0459-46a0-a18d-9398d974c9ad",
"metadata": {
"id": "449513f4-0459-46a0-a18d-9398d974c9ad"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "sales_channel\n",
+ "Agent 0.190746\n",
+ "Branch 0.113787\n",
+ "Call Center 0.109786\n",
+ "Web 0.117141\n",
+ "Name: Yes, dtype: float64"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# your code goes here"
+ "response_rate = df.groupby('sales_channel')['response'].value_counts(normalize=True).unstack().fillna(0)['Yes']\n",
+ "response_rate"
]
}
],
@@ -143,7 +825,7 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -157,7 +839,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.11.7"
}
},
"nbformat": 4,