diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb
index fadd718..927829f 100644
--- a/lab-dw-aggregating.ipynb
+++ b/lab-dw-aggregating.ipynb
@@ -1,165 +1,4738 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "31969215-2a90-4d8b-ac36-646a7ae13744",
- "metadata": {
- "id": "31969215-2a90-4d8b-ac36-646a7ae13744"
- },
- "source": [
- "# Lab | Data Aggregation and Filtering"
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "31969215-2a90-4d8b-ac36-646a7ae13744",
+ "metadata": {
+ "id": "31969215-2a90-4d8b-ac36-646a7ae13744"
+ },
+ "source": [
+ "# Lab | Data Aggregation and Filtering"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a8f08a52-bec0-439b-99cc-11d3809d8b5d",
+ "metadata": {
+ "id": "a8f08a52-bec0-439b-99cc-11d3809d8b5d"
+ },
+ "source": [
+ "In this challenge, we will continue to work with customer data from an insurance company. We will use the dataset called marketing_customer_analysis.csv, which can be found at the following link:\n",
+ "\n",
+ "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\n",
+ "\n",
+ "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by first performing data cleaning, formatting, and structuring."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "27bcb0b8-d34b-43cb-a8e0-88a7b065158d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "889be365-bc0a-4aa5-9078-7c5afd554453",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv'\n",
+ "marketing_customer_analysis = pd.read_csv(url)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "2b9db5e6-2c01-422a-8fee-49284adf1e59",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " Customer | \n",
+ " State | \n",
+ " Customer Lifetime Value | \n",
+ " Response | \n",
+ " Coverage | \n",
+ " Education | \n",
+ " Effective To Date | \n",
+ " EmploymentStatus | \n",
+ " Gender | \n",
+ " ... | \n",
+ " Number of Open Complaints | \n",
+ " Number of Policies | \n",
+ " Policy Type | \n",
+ " Policy | \n",
+ " Renew Offer Type | \n",
+ " Sales Channel | \n",
+ " Total Claim Amount | \n",
+ " Vehicle Class | \n",
+ " Vehicle Size | \n",
+ " Vehicle Type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2/18/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/18/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2/10/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/11/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 1/17/11 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 1/19/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/6/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2/6/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2/13/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 4.0 | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " NaN | \n",
+ " 6857.519928 | \n",
+ " NaN | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 1/8/11 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 Customer State Customer Lifetime Value Response \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "... ... ... ... ... ... \n",
+ "10905 10905 FE99816 Nevada 15563.369440 No \n",
+ "10906 10906 KX53892 Oregon 5259.444853 No \n",
+ "10907 10907 TL39050 Arizona 23893.304100 No \n",
+ "10908 10908 WA60547 California 11971.977650 No \n",
+ "10909 10909 IV32877 NaN 6857.519928 NaN \n",
+ "\n",
+ " Coverage Education Effective To Date EmploymentStatus Gender ... \\\n",
+ "0 Basic College 2/18/11 Employed M ... \n",
+ "1 Basic College 1/18/11 Unemployed F ... \n",
+ "2 Basic Bachelor 2/10/11 Employed M ... \n",
+ "3 Extended College 1/11/11 Employed M ... \n",
+ "4 Premium Bachelor 1/17/11 Medical Leave F ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Premium Bachelor 1/19/11 Unemployed F ... \n",
+ "10906 Basic College 1/6/11 Employed F ... \n",
+ "10907 Extended Bachelor 2/6/11 Employed F ... \n",
+ "10908 Premium College 2/13/11 Employed F ... \n",
+ "10909 Basic Bachelor 1/8/11 Unemployed M ... \n",
+ "\n",
+ " Number of Open Complaints Number of Policies Policy Type \\\n",
+ "0 0.0 9 Corporate Auto \n",
+ "1 0.0 1 Personal Auto \n",
+ "2 0.0 2 Personal Auto \n",
+ "3 0.0 2 Corporate Auto \n",
+ "4 NaN 7 Personal Auto \n",
+ "... ... ... ... \n",
+ "10905 NaN 7 Personal Auto \n",
+ "10906 0.0 6 Personal Auto \n",
+ "10907 0.0 2 Corporate Auto \n",
+ "10908 4.0 6 Personal Auto \n",
+ "10909 0.0 3 Personal Auto \n",
+ "\n",
+ " Policy Renew Offer Type Sales Channel Total Claim Amount \\\n",
+ "0 Corporate L3 Offer3 Agent 292.800000 \n",
+ "1 Personal L3 Offer4 Call Center 744.924331 \n",
+ "2 Personal L3 Offer3 Call Center 480.000000 \n",
+ "3 Corporate L3 Offer2 Branch 484.013411 \n",
+ "4 Personal L2 Offer1 Branch 707.925645 \n",
+ "... ... ... ... ... \n",
+ "10905 Personal L1 Offer3 Web 1214.400000 \n",
+ "10906 Personal L3 Offer2 Branch 273.018929 \n",
+ "10907 Corporate L3 Offer1 Web 381.306996 \n",
+ "10908 Personal L1 Offer1 Branch 618.288849 \n",
+ "10909 Personal L1 Offer4 Web 1021.719397 \n",
+ "\n",
+ " Vehicle Class Vehicle Size Vehicle Type \n",
+ "0 Four-Door Car Medsize NaN \n",
+ "1 Four-Door Car Medsize NaN \n",
+ "2 SUV Medsize A \n",
+ "3 Four-Door Car Medsize A \n",
+ "4 Four-Door Car Medsize NaN \n",
+ "... ... ... ... \n",
+ "10905 Luxury Car Medsize A \n",
+ "10906 Four-Door Car Medsize A \n",
+ "10907 Luxury SUV Medsize NaN \n",
+ "10908 SUV Medsize A \n",
+ "10909 SUV Medsize NaN \n",
+ "\n",
+ "[10910 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "948f4f56-f932-411c-93c4-9e7bfd4a8b7b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Function to clean column names\n",
+ "def clean_column_names_1(columns):\n",
+ " new_cols = []\n",
+ " for col in columns:\n",
+ " col = col.upper() \n",
+ " col = col.strip() \n",
+ " col = col.replace(\" \", \"_\") \n",
+ " new_cols.append(col)\n",
+ " return new_cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "e3330ced-45f9-48d3-a3bb-11ab811ad068",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "marketing_customer_analysis.columns = clean_column_names_1(marketing_customer_analysis.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "4cd82200-7653-4fb0-aeeb-0a4eed63d79b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " UNNAMED:_0 | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENTSTATUS | \n",
+ " GENDER | \n",
+ " ... | \n",
+ " NUMBER_OF_OPEN_COMPLAINTS | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2/18/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/18/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2/10/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/11/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 1/17/11 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 1/19/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/6/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2/6/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2/13/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 4.0 | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " NaN | \n",
+ " 6857.519928 | \n",
+ " NaN | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 1/8/11 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " UNNAMED:_0 CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "... ... ... ... ... ... \n",
+ "10905 10905 FE99816 Nevada 15563.369440 No \n",
+ "10906 10906 KX53892 Oregon 5259.444853 No \n",
+ "10907 10907 TL39050 Arizona 23893.304100 No \n",
+ "10908 10908 WA60547 California 11971.977650 No \n",
+ "10909 10909 IV32877 NaN 6857.519928 NaN \n",
+ "\n",
+ " COVERAGE EDUCATION EFFECTIVE_TO_DATE EMPLOYMENTSTATUS GENDER ... \\\n",
+ "0 Basic College 2/18/11 Employed M ... \n",
+ "1 Basic College 1/18/11 Unemployed F ... \n",
+ "2 Basic Bachelor 2/10/11 Employed M ... \n",
+ "3 Extended College 1/11/11 Employed M ... \n",
+ "4 Premium Bachelor 1/17/11 Medical Leave F ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Premium Bachelor 1/19/11 Unemployed F ... \n",
+ "10906 Basic College 1/6/11 Employed F ... \n",
+ "10907 Extended Bachelor 2/6/11 Employed F ... \n",
+ "10908 Premium College 2/13/11 Employed F ... \n",
+ "10909 Basic Bachelor 1/8/11 Unemployed M ... \n",
+ "\n",
+ " NUMBER_OF_OPEN_COMPLAINTS NUMBER_OF_POLICIES POLICY_TYPE \\\n",
+ "0 0.0 9 Corporate Auto \n",
+ "1 0.0 1 Personal Auto \n",
+ "2 0.0 2 Personal Auto \n",
+ "3 0.0 2 Corporate Auto \n",
+ "4 NaN 7 Personal Auto \n",
+ "... ... ... ... \n",
+ "10905 NaN 7 Personal Auto \n",
+ "10906 0.0 6 Personal Auto \n",
+ "10907 0.0 2 Corporate Auto \n",
+ "10908 4.0 6 Personal Auto \n",
+ "10909 0.0 3 Personal Auto \n",
+ "\n",
+ " POLICY RENEW_OFFER_TYPE SALES_CHANNEL TOTAL_CLAIM_AMOUNT \\\n",
+ "0 Corporate L3 Offer3 Agent 292.800000 \n",
+ "1 Personal L3 Offer4 Call Center 744.924331 \n",
+ "2 Personal L3 Offer3 Call Center 480.000000 \n",
+ "3 Corporate L3 Offer2 Branch 484.013411 \n",
+ "4 Personal L2 Offer1 Branch 707.925645 \n",
+ "... ... ... ... ... \n",
+ "10905 Personal L1 Offer3 Web 1214.400000 \n",
+ "10906 Personal L3 Offer2 Branch 273.018929 \n",
+ "10907 Corporate L3 Offer1 Web 381.306996 \n",
+ "10908 Personal L1 Offer1 Branch 618.288849 \n",
+ "10909 Personal L1 Offer4 Web 1021.719397 \n",
+ "\n",
+ " VEHICLE_CLASS VEHICLE_SIZE VEHICLE_TYPE \n",
+ "0 Four-Door Car Medsize NaN \n",
+ "1 Four-Door Car Medsize NaN \n",
+ "2 SUV Medsize A \n",
+ "3 Four-Door Car Medsize A \n",
+ "4 Four-Door Car Medsize NaN \n",
+ "... ... ... ... \n",
+ "10905 Luxury Car Medsize A \n",
+ "10906 Four-Door Car Medsize A \n",
+ "10907 Luxury SUV Medsize NaN \n",
+ "10908 SUV Medsize A \n",
+ "10909 SUV Medsize NaN \n",
+ "\n",
+ "[10910 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "c8063ce7-73b1-4d5e-b6ea-06cc6ac955e1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " UNNAMED:_0 | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENT_STATUS | \n",
+ " GENDER | \n",
+ " ... | \n",
+ " NUMBER_OF_OPEN_COMPLAINTS | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2/18/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/18/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2/10/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/11/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 1/17/11 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 1/19/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/6/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2/6/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2/13/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 4.0 | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " NaN | \n",
+ " 6857.519928 | \n",
+ " NaN | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 1/8/11 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " UNNAMED:_0 CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "... ... ... ... ... ... \n",
+ "10905 10905 FE99816 Nevada 15563.369440 No \n",
+ "10906 10906 KX53892 Oregon 5259.444853 No \n",
+ "10907 10907 TL39050 Arizona 23893.304100 No \n",
+ "10908 10908 WA60547 California 11971.977650 No \n",
+ "10909 10909 IV32877 NaN 6857.519928 NaN \n",
+ "\n",
+ " COVERAGE EDUCATION EFFECTIVE_TO_DATE EMPLOYMENT_STATUS GENDER ... \\\n",
+ "0 Basic College 2/18/11 Employed M ... \n",
+ "1 Basic College 1/18/11 Unemployed F ... \n",
+ "2 Basic Bachelor 2/10/11 Employed M ... \n",
+ "3 Extended College 1/11/11 Employed M ... \n",
+ "4 Premium Bachelor 1/17/11 Medical Leave F ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Premium Bachelor 1/19/11 Unemployed F ... \n",
+ "10906 Basic College 1/6/11 Employed F ... \n",
+ "10907 Extended Bachelor 2/6/11 Employed F ... \n",
+ "10908 Premium College 2/13/11 Employed F ... \n",
+ "10909 Basic Bachelor 1/8/11 Unemployed M ... \n",
+ "\n",
+ " NUMBER_OF_OPEN_COMPLAINTS NUMBER_OF_POLICIES POLICY_TYPE \\\n",
+ "0 0.0 9 Corporate Auto \n",
+ "1 0.0 1 Personal Auto \n",
+ "2 0.0 2 Personal Auto \n",
+ "3 0.0 2 Corporate Auto \n",
+ "4 NaN 7 Personal Auto \n",
+ "... ... ... ... \n",
+ "10905 NaN 7 Personal Auto \n",
+ "10906 0.0 6 Personal Auto \n",
+ "10907 0.0 2 Corporate Auto \n",
+ "10908 4.0 6 Personal Auto \n",
+ "10909 0.0 3 Personal Auto \n",
+ "\n",
+ " POLICY RENEW_OFFER_TYPE SALES_CHANNEL TOTAL_CLAIM_AMOUNT \\\n",
+ "0 Corporate L3 Offer3 Agent 292.800000 \n",
+ "1 Personal L3 Offer4 Call Center 744.924331 \n",
+ "2 Personal L3 Offer3 Call Center 480.000000 \n",
+ "3 Corporate L3 Offer2 Branch 484.013411 \n",
+ "4 Personal L2 Offer1 Branch 707.925645 \n",
+ "... ... ... ... ... \n",
+ "10905 Personal L1 Offer3 Web 1214.400000 \n",
+ "10906 Personal L3 Offer2 Branch 273.018929 \n",
+ "10907 Corporate L3 Offer1 Web 381.306996 \n",
+ "10908 Personal L1 Offer1 Branch 618.288849 \n",
+ "10909 Personal L1 Offer4 Web 1021.719397 \n",
+ "\n",
+ " VEHICLE_CLASS VEHICLE_SIZE VEHICLE_TYPE \n",
+ "0 Four-Door Car Medsize NaN \n",
+ "1 Four-Door Car Medsize NaN \n",
+ "2 SUV Medsize A \n",
+ "3 Four-Door Car Medsize A \n",
+ "4 Four-Door Car Medsize NaN \n",
+ "... ... ... ... \n",
+ "10905 Luxury Car Medsize A \n",
+ "10906 Four-Door Car Medsize A \n",
+ "10907 Luxury SUV Medsize NaN \n",
+ "10908 SUV Medsize A \n",
+ "10909 SUV Medsize NaN \n",
+ "\n",
+ "[10910 rows x 26 columns]"
]
- },
- {
- "cell_type": "markdown",
- "id": "a8f08a52-bec0-439b-99cc-11d3809d8b5d",
- "metadata": {
- "id": "a8f08a52-bec0-439b-99cc-11d3809d8b5d"
- },
- "source": [
- "In this challenge, we will continue to work with customer data from an insurance company. We will use the dataset called marketing_customer_analysis.csv, which can be found at the following link:\n",
- "\n",
- "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\n",
- "\n",
- "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by first performing data cleaning, formatting, and structuring."
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis = marketing_customer_analysis.rename(columns={'EMPLOYMENTSTATUS': 'EMPLOYMENT_STATUS'})\n",
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "b1ec69be-c1d8-4e62-845b-0e42dc415965",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENT_STATUS | \n",
+ " GENDER | \n",
+ " INCOME | \n",
+ " ... | \n",
+ " NUMBER_OF_OPEN_COMPLAINTS | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2/18/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 48029 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/18/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2/10/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 22139 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/11/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 49078 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 1/17/11 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " 23675 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 1/19/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/6/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 61146 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2/6/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 39837 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2/13/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 64195 | \n",
+ " ... | \n",
+ " 4.0 | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " NaN | \n",
+ " 6857.519928 | \n",
+ " NaN | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 1/8/11 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 25 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE COVERAGE \\\n",
+ "0 DK49336 Arizona 4809.216960 No Basic \n",
+ "1 KX64629 California 2228.525238 No Basic \n",
+ "2 LZ68649 Washington 14947.917300 No Basic \n",
+ "3 XL78013 Oregon 22332.439460 Yes Extended \n",
+ "4 QA50777 Oregon 9025.067525 No Premium \n",
+ "... ... ... ... ... ... \n",
+ "10905 FE99816 Nevada 15563.369440 No Premium \n",
+ "10906 KX53892 Oregon 5259.444853 No Basic \n",
+ "10907 TL39050 Arizona 23893.304100 No Extended \n",
+ "10908 WA60547 California 11971.977650 No Premium \n",
+ "10909 IV32877 NaN 6857.519928 NaN Basic \n",
+ "\n",
+ " EDUCATION EFFECTIVE_TO_DATE EMPLOYMENT_STATUS GENDER INCOME ... \\\n",
+ "0 College 2/18/11 Employed M 48029 ... \n",
+ "1 College 1/18/11 Unemployed F 0 ... \n",
+ "2 Bachelor 2/10/11 Employed M 22139 ... \n",
+ "3 College 1/11/11 Employed M 49078 ... \n",
+ "4 Bachelor 1/17/11 Medical Leave F 23675 ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Bachelor 1/19/11 Unemployed F 0 ... \n",
+ "10906 College 1/6/11 Employed F 61146 ... \n",
+ "10907 Bachelor 2/6/11 Employed F 39837 ... \n",
+ "10908 College 2/13/11 Employed F 64195 ... \n",
+ "10909 Bachelor 1/8/11 Unemployed M 0 ... \n",
+ "\n",
+ " NUMBER_OF_OPEN_COMPLAINTS NUMBER_OF_POLICIES POLICY_TYPE \\\n",
+ "0 0.0 9 Corporate Auto \n",
+ "1 0.0 1 Personal Auto \n",
+ "2 0.0 2 Personal Auto \n",
+ "3 0.0 2 Corporate Auto \n",
+ "4 NaN 7 Personal Auto \n",
+ "... ... ... ... \n",
+ "10905 NaN 7 Personal Auto \n",
+ "10906 0.0 6 Personal Auto \n",
+ "10907 0.0 2 Corporate Auto \n",
+ "10908 4.0 6 Personal Auto \n",
+ "10909 0.0 3 Personal Auto \n",
+ "\n",
+ " POLICY RENEW_OFFER_TYPE SALES_CHANNEL TOTAL_CLAIM_AMOUNT \\\n",
+ "0 Corporate L3 Offer3 Agent 292.800000 \n",
+ "1 Personal L3 Offer4 Call Center 744.924331 \n",
+ "2 Personal L3 Offer3 Call Center 480.000000 \n",
+ "3 Corporate L3 Offer2 Branch 484.013411 \n",
+ "4 Personal L2 Offer1 Branch 707.925645 \n",
+ "... ... ... ... ... \n",
+ "10905 Personal L1 Offer3 Web 1214.400000 \n",
+ "10906 Personal L3 Offer2 Branch 273.018929 \n",
+ "10907 Corporate L3 Offer1 Web 381.306996 \n",
+ "10908 Personal L1 Offer1 Branch 618.288849 \n",
+ "10909 Personal L1 Offer4 Web 1021.719397 \n",
+ "\n",
+ " VEHICLE_CLASS VEHICLE_SIZE VEHICLE_TYPE \n",
+ "0 Four-Door Car Medsize NaN \n",
+ "1 Four-Door Car Medsize NaN \n",
+ "2 SUV Medsize A \n",
+ "3 Four-Door Car Medsize A \n",
+ "4 Four-Door Car Medsize NaN \n",
+ "... ... ... ... \n",
+ "10905 Luxury Car Medsize A \n",
+ "10906 Four-Door Car Medsize A \n",
+ "10907 Luxury SUV Medsize NaN \n",
+ "10908 SUV Medsize A \n",
+ "10909 SUV Medsize NaN \n",
+ "\n",
+ "[10910 rows x 25 columns]"
]
- },
- {
- "cell_type": "markdown",
- "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50",
- "metadata": {
- "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50"
- },
- "source": [
- "1. Create a new DataFrame that only includes customers who:\n",
- " - have a **low total_claim_amount** (e.g., below $1,000),\n",
- " - have a response \"Yes\" to the last marketing campaign."
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis = marketing_customer_analysis.drop(columns=[\"UNNAMED:_0\"])\n",
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "ca8b902a-73cf-4525-b89b-9fd7ed9df50d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# data_cleaning.py\n",
+ "\n",
+ "def standardize_gender(marketing_customer_analysis, column=\"GENDER\"):\n",
+ " \"\"\"Standardize Gender column to 'M' and 'F'.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].str.upper().replace({\n",
+ " \"FEMALE\": \"F\",\n",
+ " \"FEMAL\": \"F\",\n",
+ " \"MALE\": \"M\"\n",
+ " })\n",
+ " return marketing_customer_analysis\n",
+ " \n",
+ "def standardize_states(marketing_customer_analysis, column=\"STATES\"):\n",
+ " \"\"\"Replace state abbreviations with full names.\"\"\"\n",
+ " state_mapping = {\n",
+ " \"AZ\": \"Arizona\",\n",
+ " \"Cali\": \"California\",\n",
+ " \"WA\": \"Washington\"\n",
+ " }\n",
+ " if column in marketing_customer_analysis.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].replace(state_mapping)\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def standardize_education(marketing_customer_analysis, column=\"EDUCATION\"):\n",
+ " \"\"\"Standardize education column (Bachelors -> Bachelor).\"\"\"\n",
+ " if column in marketing_customer_analysis.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].replace({\n",
+ " \"Bachelors\": \"Bachelor\"\n",
+ " })\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def clean_customer_lifetime_value(marketing_customer_analysis, column=\"CUSTOMER_LIFETIME_VALUE\"):\n",
+ " \"\"\"Remove % or $ and convert to numeric.\"\"\"\n",
+ " if column in marketing_customer_analysis.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].astype(str).str.replace(\"%\", \"\").str.replace(\"$\", \"\").str.strip()\n",
+ " marketing_customer_analysis[column] = pd.to_numeric(marketing_customer_analysis[column], errors='coerce')\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def standardize_vehicle_class(marketing_customer_analysis, column=\"VEHICLE_CLASS\"):\n",
+ " \"\"\"Standardize vehicle class to simpler categories.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].replace({\n",
+ " \"Sports Car\": \"Luxury\",\n",
+ " \"Luxury SUV\": \"Luxury\",\n",
+ " \"Luxury Car\": \"Luxury\"\n",
+ " })\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def clean_number_of_open_complaints(marketing_customer_analysis, column=\"NUMBER_OF_OPEN_COMPLAINTS\"):\n",
+ " \"\"\"Extract middle value from strings like '1/5/00' and convert to numeric.\"\"\"\n",
+ " if column in marketing_customer_analysis.columns:\n",
+ " def extract_middle(value):\n",
+ " if pd.isna(value):\n",
+ " return 0\n",
+ " parts = str(value).split(\"/\")\n",
+ " return int(parts[1]) if len(parts) >= 2 else int(parts[0])\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].apply(extract_middle)\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def fill_missing_values(marketing_customer_analysis):\n",
+ " \"\"\"Fill missing numeric values with mean, categorical with mode.\"\"\"\n",
+ " num_cols = marketing_customer_analysis.select_dtypes(include=['int64', 'float64']).columns\n",
+ " cat_cols = marketing_customer_analysis.select_dtypes(include=['object', 'category']).columns\n",
+ "\n",
+ " for col in num_cols:\n",
+ " marketing_customer_analysis[col].fillna(marketing_customer_analysis[col].mean(), inplace=True)\n",
+ " for col in cat_cols:\n",
+ " marketing_customer_analysis[col].fillna(marketing_customer_analysis[col].mode()[0], inplace=True)\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def drop_duplicates(marketing_customer_analysis, subset=None):\n",
+ " \"\"\"Drop duplicate rows based on optional subset of columns.\"\"\"\n",
+ " marketing_customer_analysis.drop_duplicates(subset=subset, keep='first', inplace=True)\n",
+ " marketing_customer_analysis.reset_index(drop=True, inplace=True)\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def main(marketing_customer_analysis):\n",
+ " \"\"\"Main function to perform all cleaning and formatting steps.\"\"\"\n",
+ " marketing_customer_analysis = standardize_gender(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = standardize_states(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = standardize_education(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = clean_customer_lifetime_value(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = standardize_vehicle_class(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = clean_number_of_open_complaints(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = fill_missing_values(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = drop_duplicates(marketing_customer_analysis)\n",
+ " return marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "c76bbd3e-c2a9-4818-aaa0-b257156b410a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENT_STATUS | \n",
+ " GENDER | \n",
+ " INCOME | \n",
+ " ... | \n",
+ " NUMBER_OF_OPEN_COMPLAINTS | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2/18/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 48029 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/18/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2/10/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 22139 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/11/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 49078 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 1/17/11 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " 23675 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 1/19/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/6/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 61146 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2/6/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 39837 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2/13/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 64195 | \n",
+ " ... | \n",
+ " 4.0 | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " NaN | \n",
+ " 6857.519928 | \n",
+ " NaN | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 1/8/11 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 25 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE COVERAGE \\\n",
+ "0 DK49336 Arizona 4809.216960 No Basic \n",
+ "1 KX64629 California 2228.525238 No Basic \n",
+ "2 LZ68649 Washington 14947.917300 No Basic \n",
+ "3 XL78013 Oregon 22332.439460 Yes Extended \n",
+ "4 QA50777 Oregon 9025.067525 No Premium \n",
+ "... ... ... ... ... ... \n",
+ "10905 FE99816 Nevada 15563.369440 No Premium \n",
+ "10906 KX53892 Oregon 5259.444853 No Basic \n",
+ "10907 TL39050 Arizona 23893.304100 No Extended \n",
+ "10908 WA60547 California 11971.977650 No Premium \n",
+ "10909 IV32877 NaN 6857.519928 NaN Basic \n",
+ "\n",
+ " EDUCATION EFFECTIVE_TO_DATE EMPLOYMENT_STATUS GENDER INCOME ... \\\n",
+ "0 College 2/18/11 Employed M 48029 ... \n",
+ "1 College 1/18/11 Unemployed F 0 ... \n",
+ "2 Bachelor 2/10/11 Employed M 22139 ... \n",
+ "3 College 1/11/11 Employed M 49078 ... \n",
+ "4 Bachelor 1/17/11 Medical Leave F 23675 ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Bachelor 1/19/11 Unemployed F 0 ... \n",
+ "10906 College 1/6/11 Employed F 61146 ... \n",
+ "10907 Bachelor 2/6/11 Employed F 39837 ... \n",
+ "10908 College 2/13/11 Employed F 64195 ... \n",
+ "10909 Bachelor 1/8/11 Unemployed M 0 ... \n",
+ "\n",
+ " NUMBER_OF_OPEN_COMPLAINTS NUMBER_OF_POLICIES POLICY_TYPE \\\n",
+ "0 0.0 9 Corporate Auto \n",
+ "1 0.0 1 Personal Auto \n",
+ "2 0.0 2 Personal Auto \n",
+ "3 0.0 2 Corporate Auto \n",
+ "4 NaN 7 Personal Auto \n",
+ "... ... ... ... \n",
+ "10905 NaN 7 Personal Auto \n",
+ "10906 0.0 6 Personal Auto \n",
+ "10907 0.0 2 Corporate Auto \n",
+ "10908 4.0 6 Personal Auto \n",
+ "10909 0.0 3 Personal Auto \n",
+ "\n",
+ " POLICY RENEW_OFFER_TYPE SALES_CHANNEL TOTAL_CLAIM_AMOUNT \\\n",
+ "0 Corporate L3 Offer3 Agent 292.800000 \n",
+ "1 Personal L3 Offer4 Call Center 744.924331 \n",
+ "2 Personal L3 Offer3 Call Center 480.000000 \n",
+ "3 Corporate L3 Offer2 Branch 484.013411 \n",
+ "4 Personal L2 Offer1 Branch 707.925645 \n",
+ "... ... ... ... ... \n",
+ "10905 Personal L1 Offer3 Web 1214.400000 \n",
+ "10906 Personal L3 Offer2 Branch 273.018929 \n",
+ "10907 Corporate L3 Offer1 Web 381.306996 \n",
+ "10908 Personal L1 Offer1 Branch 618.288849 \n",
+ "10909 Personal L1 Offer4 Web 1021.719397 \n",
+ "\n",
+ " VEHICLE_CLASS VEHICLE_SIZE VEHICLE_TYPE \n",
+ "0 Four-Door Car Medsize NaN \n",
+ "1 Four-Door Car Medsize NaN \n",
+ "2 SUV Medsize A \n",
+ "3 Four-Door Car Medsize A \n",
+ "4 Four-Door Car Medsize NaN \n",
+ "... ... ... ... \n",
+ "10905 Luxury Car Medsize A \n",
+ "10906 Four-Door Car Medsize A \n",
+ "10907 Luxury SUV Medsize NaN \n",
+ "10908 SUV Medsize A \n",
+ "10909 SUV Medsize NaN \n",
+ "\n",
+ "[10910 rows x 25 columns]"
]
- },
- {
- "cell_type": "markdown",
- "id": "b9be383e-5165-436e-80c8-57d4c757c8c3",
- "metadata": {
- "id": "b9be383e-5165-436e-80c8-57d4c757c8c3"
- },
- "source": [
- "2. Using the original Dataframe, analyze:\n",
- " - the average `monthly_premium` and/or customer lifetime value by `policy_type` and `gender` for customers who responded \"Yes\", and\n",
- " - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company."
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "36e76ceb-fc44-439b-962c-38529b75fd43",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['No' 'Yes' nan]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['RESPONSE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "78253743-b066-4f2b-9dc9-924967b2be8c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Basic' 'Extended' 'Premium']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['COVERAGE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "a2f126dd-3050-4a69-bf24-58b25cc225f9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['2/18/11' '1/18/11' '2/10/11' '1/11/11' '1/17/11' '2/14/11' '2/24/11'\n",
+ " '1/19/11' '1/4/11' '1/2/11' '2/7/11' '1/31/11' '1/26/11' '2/28/11'\n",
+ " '1/16/11' '2/26/11' '2/23/11' '1/15/11' '2/2/11' '2/15/11' '1/24/11'\n",
+ " '2/21/11' '2/22/11' '1/7/11' '1/28/11' '2/8/11' '2/12/11' '2/20/11'\n",
+ " '1/5/11' '2/19/11' '1/3/11' '2/3/11' '1/22/11' '1/23/11' '2/5/11'\n",
+ " '2/13/11' '1/25/11' '2/16/11' '2/1/11' '1/27/11' '1/12/11' '1/20/11'\n",
+ " '2/6/11' '2/11/11' '1/21/11' '1/29/11' '1/9/11' '2/9/11' '2/27/11'\n",
+ " '1/1/11' '2/17/11' '2/25/11' '1/13/11' '1/6/11' '2/4/11' '1/14/11'\n",
+ " '1/10/11' '1/8/11' '1/30/11']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['EFFECTIVE_TO_DATE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "d7af64b3-4c40-4871-a5c1-6c0f9d5f0124",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "object\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['EFFECTIVE_TO_DATE'].dtype) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "468a09f2-0319-4ac5-a864-8e7be8688211",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Employed' 'Unemployed' 'Medical Leave' 'Disabled' 'Retired']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['EMPLOYMENT_STATUS'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "4c37adb5-4df9-4241-89fb-7c4f1cfbae7c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Suburban' 'Urban' 'Rural']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['LOCATION_CODE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "34446b61-e818-4099-8e58-7cfe58122b32",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Married' 'Single' 'Divorced']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['MARITAL_STATUS'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "2b4e6637-6bbc-458e-9340-82135b0dda0b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ 7. 3. 34. 10. nan 2. 8. 35. 33. 19. 13. 5. 24. 25. 6. 20. 26. 14.\n",
+ " 9. 29. 11. 1. 18. 16. 30. 12. 4. 22. 31. 21. 28. 17. 15. 32. 23. 27.\n",
+ " 0.]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "149416d0-010b-4e64-b263-fa12e4ca7b21",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[52 26 31 3 73 99 45 24 8 29 32 25 28 87 10 74 1 38 58 37 7 80 95 78\n",
+ " 63 27 97 39 11 59 46 62 13 54 51 22 82 91 44 43 76 48 84 6 92 12 61 4\n",
+ " 18 66 70 16 75 34 35 64 9 89 0 60 71 23 55 93 2 67 81 40 57 86 19 72\n",
+ " 69 33 47 42 17 49 21 83 94 30 15 50 53 77 41 90 5 79 56 98 20 88 65 14\n",
+ " 85 96 36 68]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['MONTHS_SINCE_POLICY_INCEPTION'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "46dc903f-9aed-4061-8567-9d3e7005b346",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[9 1 2 7 4 3 6 8 5]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['NUMBER_OF_POLICIES'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "6e4bd483-7df2-4719-add2-cb5196157af6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Corporate L3' 'Personal L3' 'Personal L2' 'Corporate L2' 'Personal L1'\n",
+ " 'Special L1' 'Corporate L1' 'Special L3' 'Special L2']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['POLICY'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "af0db43c-e26c-41cc-b4ba-f034ee57b087",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Offer3' 'Offer4' 'Offer2' 'Offer1']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['RENEW_OFFER_TYPE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "3300c175-2a1b-4b47-a068-70169c78e147",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Agent' 'Call Center' 'Branch' 'Web']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['SALES_CHANNEL'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "1f56247a-8a50-4ddc-832e-2a8faa8c89da",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Medsize' 'Small' 'Large' nan]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['VEHICLE_SIZE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "a4ab4a51-c52d-4a48-83fc-13d2d018343f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[nan 'A']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['VEHICLE_TYPE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "6152cda9-d2f7-45f9-8e8a-50de3992f84c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Four-Door Car' 'SUV' 'Two-Door Car' 'Sports Car' 'Luxury Car'\n",
+ " 'Luxury SUV' nan]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['VEHICLE_CLASS'].unique())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50",
+ "metadata": {
+ "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50"
+ },
+ "source": [
+ "1. Create a new DataFrame that only includes customers who:\n",
+ " - have a **low total_claim_amount** (e.g., below $1,000),\n",
+ " - have a response \"Yes\" to the last marketing campaign."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "c0f2f450-aad1-42d4-b3e7-35862b1c02bf",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENT_STATUS | \n",
+ " GENDER | \n",
+ " INCOME | \n",
+ " ... | \n",
+ " NUMBER_OF_OPEN_COMPLAINTS | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2/18/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 48029 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/18/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2/10/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 22139 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/11/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 49078 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 1/17/11 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " 23675 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10903 | \n",
+ " SU71163 | \n",
+ " Arizona | \n",
+ " 2771.663013 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/7/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 59855 | \n",
+ " ... | \n",
+ " 4.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 355.200000 | \n",
+ " Two-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10904 | \n",
+ " QI63521 | \n",
+ " Nevada | \n",
+ " 19228.463620 | \n",
+ " No | \n",
+ " Basic | \n",
+ " High School or Below | \n",
+ " 2/24/11 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 897.600000 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/6/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 61146 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2/6/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 39837 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2/13/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 64195 | \n",
+ " ... | \n",
+ " 4.0 | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10419 rows × 25 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE COVERAGE \\\n",
+ "0 DK49336 Arizona 4809.216960 No Basic \n",
+ "1 KX64629 California 2228.525238 No Basic \n",
+ "2 LZ68649 Washington 14947.917300 No Basic \n",
+ "3 XL78013 Oregon 22332.439460 Yes Extended \n",
+ "4 QA50777 Oregon 9025.067525 No Premium \n",
+ "... ... ... ... ... ... \n",
+ "10903 SU71163 Arizona 2771.663013 No Basic \n",
+ "10904 QI63521 Nevada 19228.463620 No Basic \n",
+ "10906 KX53892 Oregon 5259.444853 No Basic \n",
+ "10907 TL39050 Arizona 23893.304100 No Extended \n",
+ "10908 WA60547 California 11971.977650 No Premium \n",
+ "\n",
+ " EDUCATION EFFECTIVE_TO_DATE EMPLOYMENT_STATUS GENDER \\\n",
+ "0 College 2/18/11 Employed M \n",
+ "1 College 1/18/11 Unemployed F \n",
+ "2 Bachelor 2/10/11 Employed M \n",
+ "3 College 1/11/11 Employed M \n",
+ "4 Bachelor 1/17/11 Medical Leave F \n",
+ "... ... ... ... ... \n",
+ "10903 College 1/7/11 Employed M \n",
+ "10904 High School or Below 2/24/11 Unemployed M \n",
+ "10906 College 1/6/11 Employed F \n",
+ "10907 Bachelor 2/6/11 Employed F \n",
+ "10908 College 2/13/11 Employed F \n",
+ "\n",
+ " INCOME ... NUMBER_OF_OPEN_COMPLAINTS NUMBER_OF_POLICIES \\\n",
+ "0 48029 ... 0.0 9 \n",
+ "1 0 ... 0.0 1 \n",
+ "2 22139 ... 0.0 2 \n",
+ "3 49078 ... 0.0 2 \n",
+ "4 23675 ... NaN 7 \n",
+ "... ... ... ... ... \n",
+ "10903 59855 ... 4.0 1 \n",
+ "10904 0 ... 0.0 2 \n",
+ "10906 61146 ... 0.0 6 \n",
+ "10907 39837 ... 0.0 2 \n",
+ "10908 64195 ... 4.0 6 \n",
+ "\n",
+ " POLICY_TYPE POLICY RENEW_OFFER_TYPE SALES_CHANNEL \\\n",
+ "0 Corporate Auto Corporate L3 Offer3 Agent \n",
+ "1 Personal Auto Personal L3 Offer4 Call Center \n",
+ "2 Personal Auto Personal L3 Offer3 Call Center \n",
+ "3 Corporate Auto Corporate L3 Offer2 Branch \n",
+ "4 Personal Auto Personal L2 Offer1 Branch \n",
+ "... ... ... ... ... \n",
+ "10903 Personal Auto Personal L2 Offer2 Branch \n",
+ "10904 Personal Auto Personal L2 Offer1 Branch \n",
+ "10906 Personal Auto Personal L3 Offer2 Branch \n",
+ "10907 Corporate Auto Corporate L3 Offer1 Web \n",
+ "10908 Personal Auto Personal L1 Offer1 Branch \n",
+ "\n",
+ " TOTAL_CLAIM_AMOUNT VEHICLE_CLASS VEHICLE_SIZE VEHICLE_TYPE \n",
+ "0 292.800000 Four-Door Car Medsize NaN \n",
+ "1 744.924331 Four-Door Car Medsize NaN \n",
+ "2 480.000000 SUV Medsize A \n",
+ "3 484.013411 Four-Door Car Medsize A \n",
+ "4 707.925645 Four-Door Car Medsize NaN \n",
+ "... ... ... ... ... \n",
+ "10903 355.200000 Two-Door Car Medsize A \n",
+ "10904 897.600000 Luxury SUV Medsize A \n",
+ "10906 273.018929 Four-Door Car Medsize A \n",
+ "10907 381.306996 Luxury SUV Medsize NaN \n",
+ "10908 618.288849 SUV Medsize A \n",
+ "\n",
+ "[10419 rows x 25 columns]"
]
- },
- {
- "cell_type": "markdown",
- "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0",
- "metadata": {
- "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0"
- },
- "source": [
- "3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers."
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "low_claim_customers = marketing_customer_analysis[marketing_customer_analysis['TOTAL_CLAIM_AMOUNT'] < 1000]\n",
+ "low_claim_customers"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "8826d026-c2fa-421d-bd04-046db6f8b8fc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENT_STATUS | \n",
+ " GENDER | \n",
+ " INCOME | \n",
+ " ... | \n",
+ " NUMBER_OF_OPEN_COMPLAINTS | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/11/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 49078 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " FM55990 | \n",
+ " California | \n",
+ " 5989.773931 | \n",
+ " Yes | \n",
+ " Premium | \n",
+ " College | \n",
+ " 1/19/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 66839 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 739.200000 | \n",
+ " Sports Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " 4626.801093 | \n",
+ " Yes | \n",
+ " Basic | \n",
+ " Master | \n",
+ " 1/16/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 79487 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Special Auto | \n",
+ " Special L1 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 547.200000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " NJ54277 | \n",
+ " California | \n",
+ " 3746.751625 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2/26/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 41479 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer2 | \n",
+ " Call Center | \n",
+ " 19.575683 | \n",
+ " Two-Door Car | \n",
+ " Large | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " MQ68407 | \n",
+ " Oregon | \n",
+ " 4376.363592 | \n",
+ " Yes | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2/28/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 63774 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Agent | \n",
+ " 60.036683 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10844 | \n",
+ " FM31768 | \n",
+ " Arizona | \n",
+ " 5979.724161 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " High School or Below | \n",
+ " 2/7/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 57693 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer2 | \n",
+ " Agent | \n",
+ " 547.200000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10852 | \n",
+ " KZ80424 | \n",
+ " Washington | \n",
+ " 8382.478392 | \n",
+ " Yes | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 1/27/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 25998 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer2 | \n",
+ " Call Center | \n",
+ " 791.878042 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10872 | \n",
+ " XT67997 | \n",
+ " California | \n",
+ " 5979.724161 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " High School or Below | \n",
+ " 2/7/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 57693 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Agent | \n",
+ " 547.200000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10887 | \n",
+ " BY78730 | \n",
+ " Oregon | \n",
+ " 8879.790017 | \n",
+ " Yes | \n",
+ " Basic | \n",
+ " High School or Below | \n",
+ " 2/3/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 52583 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 7 | \n",
+ " Special Auto | \n",
+ " Special L2 | \n",
+ " Offer1 | \n",
+ " Agent | \n",
+ " 528.200860 | \n",
+ " SUV | \n",
+ " Small | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10897 | \n",
+ " MM70762 | \n",
+ " Arizona | \n",
+ " 9075.768214 | \n",
+ " Yes | \n",
+ " Basic | \n",
+ " Master | \n",
+ " 1/26/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 37722 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 8 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Agent | \n",
+ " 158.077504 | \n",
+ " Sports Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1399 rows × 25 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE COVERAGE \\\n",
+ "3 XL78013 Oregon 22332.439460 Yes Extended \n",
+ "8 FM55990 California 5989.773931 Yes Premium \n",
+ "15 CW49887 California 4626.801093 Yes Basic \n",
+ "19 NJ54277 California 3746.751625 Yes Extended \n",
+ "27 MQ68407 Oregon 4376.363592 Yes Premium \n",
+ "... ... ... ... ... ... \n",
+ "10844 FM31768 Arizona 5979.724161 Yes Extended \n",
+ "10852 KZ80424 Washington 8382.478392 Yes Basic \n",
+ "10872 XT67997 California 5979.724161 Yes Extended \n",
+ "10887 BY78730 Oregon 8879.790017 Yes Basic \n",
+ "10897 MM70762 Arizona 9075.768214 Yes Basic \n",
+ "\n",
+ " EDUCATION EFFECTIVE_TO_DATE EMPLOYMENT_STATUS GENDER \\\n",
+ "3 College 1/11/11 Employed M \n",
+ "8 College 1/19/11 Employed M \n",
+ "15 Master 1/16/11 Employed F \n",
+ "19 College 2/26/11 Employed F \n",
+ "27 Bachelor 2/28/11 Employed F \n",
+ "... ... ... ... ... \n",
+ "10844 High School or Below 2/7/11 Employed F \n",
+ "10852 Bachelor 1/27/11 Employed M \n",
+ "10872 High School or Below 2/7/11 Employed F \n",
+ "10887 High School or Below 2/3/11 Employed F \n",
+ "10897 Master 1/26/11 Employed M \n",
+ "\n",
+ " INCOME ... NUMBER_OF_OPEN_COMPLAINTS NUMBER_OF_POLICIES \\\n",
+ "3 49078 ... 0.0 2 \n",
+ "8 66839 ... 0.0 1 \n",
+ "15 79487 ... 0.0 1 \n",
+ "19 41479 ... 1.0 1 \n",
+ "27 63774 ... 0.0 1 \n",
+ "... ... ... ... ... \n",
+ "10844 57693 ... 0.0 3 \n",
+ "10852 25998 ... 0.0 2 \n",
+ "10872 57693 ... 0.0 3 \n",
+ "10887 52583 ... 0.0 7 \n",
+ "10897 37722 ... 0.0 8 \n",
+ "\n",
+ " POLICY_TYPE POLICY RENEW_OFFER_TYPE SALES_CHANNEL \\\n",
+ "3 Corporate Auto Corporate L3 Offer2 Branch \n",
+ "8 Personal Auto Personal L1 Offer2 Branch \n",
+ "15 Special Auto Special L1 Offer2 Branch \n",
+ "19 Personal Auto Personal L2 Offer2 Call Center \n",
+ "27 Personal Auto Personal L3 Offer2 Agent \n",
+ "... ... ... ... ... \n",
+ "10844 Personal Auto Personal L1 Offer2 Agent \n",
+ "10852 Personal Auto Personal L2 Offer2 Call Center \n",
+ "10872 Personal Auto Personal L3 Offer2 Agent \n",
+ "10887 Special Auto Special L2 Offer1 Agent \n",
+ "10897 Personal Auto Personal L1 Offer1 Agent \n",
+ "\n",
+ " TOTAL_CLAIM_AMOUNT VEHICLE_CLASS VEHICLE_SIZE VEHICLE_TYPE \n",
+ "3 484.013411 Four-Door Car Medsize A \n",
+ "8 739.200000 Sports Car Medsize NaN \n",
+ "15 547.200000 SUV Medsize NaN \n",
+ "19 19.575683 Two-Door Car Large A \n",
+ "27 60.036683 Four-Door Car Medsize NaN \n",
+ "... ... ... ... ... \n",
+ "10844 547.200000 Four-Door Car Medsize NaN \n",
+ "10852 791.878042 NaN NaN A \n",
+ "10872 547.200000 Four-Door Car Medsize NaN \n",
+ "10887 528.200860 SUV Small A \n",
+ "10897 158.077504 Sports Car Medsize A \n",
+ "\n",
+ "[1399 rows x 25 columns]"
]
- },
- {
- "cell_type": "markdown",
- "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d",
- "metadata": {
- "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d"
- },
- "source": [
- "4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions."
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "filtered_customers = marketing_customer_analysis[(marketing_customer_analysis['TOTAL_CLAIM_AMOUNT'] < 1000) & (marketing_customer_analysis['RESPONSE'] == 'Yes')]\n",
+ "filtered_customers"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b9be383e-5165-436e-80c8-57d4c757c8c3",
+ "metadata": {
+ "id": "b9be383e-5165-436e-80c8-57d4c757c8c3"
+ },
+ "source": [
+ "2. Using the original Dataframe, analyze:\n",
+ " - the average `monthly_premium` and/or customer lifetime value by `policy_type` and `gender` for customers who responded \"Yes\", and\n",
+ " - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "30fb6dd3-cbea-4595-82ed-381e87bb37bd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENT_STATUS | \n",
+ " GENDER | \n",
+ " INCOME | \n",
+ " ... | \n",
+ " NUMBER_OF_OPEN_COMPLAINTS | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/11/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 49078 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " FM55990 | \n",
+ " California | \n",
+ " 5989.773931 | \n",
+ " Yes | \n",
+ " Premium | \n",
+ " College | \n",
+ " 1/19/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 66839 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 739.200000 | \n",
+ " Sports Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " 4626.801093 | \n",
+ " Yes | \n",
+ " Basic | \n",
+ " Master | \n",
+ " 1/16/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 79487 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Special Auto | \n",
+ " Special L1 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 547.200000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " NJ54277 | \n",
+ " California | \n",
+ " 3746.751625 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2/26/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 41479 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer2 | \n",
+ " Call Center | \n",
+ " 19.575683 | \n",
+ " Two-Door Car | \n",
+ " Large | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " MQ68407 | \n",
+ " Oregon | \n",
+ " 4376.363592 | \n",
+ " Yes | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2/28/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 63774 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Agent | \n",
+ " 60.036683 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10844 | \n",
+ " FM31768 | \n",
+ " Arizona | \n",
+ " 5979.724161 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " High School or Below | \n",
+ " 2/7/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 57693 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer2 | \n",
+ " Agent | \n",
+ " 547.200000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10852 | \n",
+ " KZ80424 | \n",
+ " Washington | \n",
+ " 8382.478392 | \n",
+ " Yes | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 1/27/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 25998 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer2 | \n",
+ " Call Center | \n",
+ " 791.878042 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10872 | \n",
+ " XT67997 | \n",
+ " California | \n",
+ " 5979.724161 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " High School or Below | \n",
+ " 2/7/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 57693 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Agent | \n",
+ " 547.200000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10887 | \n",
+ " BY78730 | \n",
+ " Oregon | \n",
+ " 8879.790017 | \n",
+ " Yes | \n",
+ " Basic | \n",
+ " High School or Below | \n",
+ " 2/3/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 52583 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 7 | \n",
+ " Special Auto | \n",
+ " Special L2 | \n",
+ " Offer1 | \n",
+ " Agent | \n",
+ " 528.200860 | \n",
+ " SUV | \n",
+ " Small | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 10897 | \n",
+ " MM70762 | \n",
+ " Arizona | \n",
+ " 9075.768214 | \n",
+ " Yes | \n",
+ " Basic | \n",
+ " Master | \n",
+ " 1/26/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 37722 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 8 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Agent | \n",
+ " 158.077504 | \n",
+ " Sports Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1466 rows × 25 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE COVERAGE \\\n",
+ "3 XL78013 Oregon 22332.439460 Yes Extended \n",
+ "8 FM55990 California 5989.773931 Yes Premium \n",
+ "15 CW49887 California 4626.801093 Yes Basic \n",
+ "19 NJ54277 California 3746.751625 Yes Extended \n",
+ "27 MQ68407 Oregon 4376.363592 Yes Premium \n",
+ "... ... ... ... ... ... \n",
+ "10844 FM31768 Arizona 5979.724161 Yes Extended \n",
+ "10852 KZ80424 Washington 8382.478392 Yes Basic \n",
+ "10872 XT67997 California 5979.724161 Yes Extended \n",
+ "10887 BY78730 Oregon 8879.790017 Yes Basic \n",
+ "10897 MM70762 Arizona 9075.768214 Yes Basic \n",
+ "\n",
+ " EDUCATION EFFECTIVE_TO_DATE EMPLOYMENT_STATUS GENDER \\\n",
+ "3 College 1/11/11 Employed M \n",
+ "8 College 1/19/11 Employed M \n",
+ "15 Master 1/16/11 Employed F \n",
+ "19 College 2/26/11 Employed F \n",
+ "27 Bachelor 2/28/11 Employed F \n",
+ "... ... ... ... ... \n",
+ "10844 High School or Below 2/7/11 Employed F \n",
+ "10852 Bachelor 1/27/11 Employed M \n",
+ "10872 High School or Below 2/7/11 Employed F \n",
+ "10887 High School or Below 2/3/11 Employed F \n",
+ "10897 Master 1/26/11 Employed M \n",
+ "\n",
+ " INCOME ... NUMBER_OF_OPEN_COMPLAINTS NUMBER_OF_POLICIES \\\n",
+ "3 49078 ... 0.0 2 \n",
+ "8 66839 ... 0.0 1 \n",
+ "15 79487 ... 0.0 1 \n",
+ "19 41479 ... 1.0 1 \n",
+ "27 63774 ... 0.0 1 \n",
+ "... ... ... ... ... \n",
+ "10844 57693 ... 0.0 3 \n",
+ "10852 25998 ... 0.0 2 \n",
+ "10872 57693 ... 0.0 3 \n",
+ "10887 52583 ... 0.0 7 \n",
+ "10897 37722 ... 0.0 8 \n",
+ "\n",
+ " POLICY_TYPE POLICY RENEW_OFFER_TYPE SALES_CHANNEL \\\n",
+ "3 Corporate Auto Corporate L3 Offer2 Branch \n",
+ "8 Personal Auto Personal L1 Offer2 Branch \n",
+ "15 Special Auto Special L1 Offer2 Branch \n",
+ "19 Personal Auto Personal L2 Offer2 Call Center \n",
+ "27 Personal Auto Personal L3 Offer2 Agent \n",
+ "... ... ... ... ... \n",
+ "10844 Personal Auto Personal L1 Offer2 Agent \n",
+ "10852 Personal Auto Personal L2 Offer2 Call Center \n",
+ "10872 Personal Auto Personal L3 Offer2 Agent \n",
+ "10887 Special Auto Special L2 Offer1 Agent \n",
+ "10897 Personal Auto Personal L1 Offer1 Agent \n",
+ "\n",
+ " TOTAL_CLAIM_AMOUNT VEHICLE_CLASS VEHICLE_SIZE VEHICLE_TYPE \n",
+ "3 484.013411 Four-Door Car Medsize A \n",
+ "8 739.200000 Sports Car Medsize NaN \n",
+ "15 547.200000 SUV Medsize NaN \n",
+ "19 19.575683 Two-Door Car Large A \n",
+ "27 60.036683 Four-Door Car Medsize NaN \n",
+ "... ... ... ... ... \n",
+ "10844 547.200000 Four-Door Car Medsize NaN \n",
+ "10852 791.878042 NaN NaN A \n",
+ "10872 547.200000 Four-Door Car Medsize NaN \n",
+ "10887 528.200860 SUV Small A \n",
+ "10897 158.077504 Sports Car Medsize A \n",
+ "\n",
+ "[1466 rows x 25 columns]"
]
- },
- {
- "cell_type": "markdown",
- "id": "b42999f9-311f-481e-ae63-40a5577072c5",
- "metadata": {
- "id": "b42999f9-311f-481e-ae63-40a5577072c5"
- },
- "source": [
- "## Bonus"
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Filter for customers who responded \"Yes\"\n",
+ "yes_customers = marketing_customer_analysis[marketing_customer_analysis['RESPONSE'] == 'Yes']\n",
+ "yes_customers"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "8f2d8967-b2cb-4daf-8365-0512f54431b6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " POLICY_TYPE | \n",
+ " GENDER | \n",
+ " avg_monthly_premium | \n",
+ " avg_lifetime_value | \n",
+ " avg_total_claim | \n",
+ " count | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " F | \n",
+ " 94.301775 | \n",
+ " 7712.628736 | \n",
+ " 433.738499 | \n",
+ " 169 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Corporate Auto | \n",
+ " M | \n",
+ " 92.188312 | \n",
+ " 7944.465414 | \n",
+ " 408.582459 | \n",
+ " 154 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " F | \n",
+ " 98.998148 | \n",
+ " 8339.791842 | \n",
+ " 452.965929 | \n",
+ " 540 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " M | \n",
+ " 91.085821 | \n",
+ " 7448.383281 | \n",
+ " 457.010178 | \n",
+ " 536 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Special Auto | \n",
+ " F | \n",
+ " 92.314286 | \n",
+ " 7691.584111 | \n",
+ " 453.280164 | \n",
+ " 35 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Special Auto | \n",
+ " M | \n",
+ " 86.343750 | \n",
+ " 8247.088702 | \n",
+ " 429.527942 | \n",
+ " 32 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " POLICY_TYPE GENDER avg_monthly_premium avg_lifetime_value \\\n",
+ "0 Corporate Auto F 94.301775 7712.628736 \n",
+ "1 Corporate Auto M 92.188312 7944.465414 \n",
+ "2 Personal Auto F 98.998148 8339.791842 \n",
+ "3 Personal Auto M 91.085821 7448.383281 \n",
+ "4 Special Auto F 92.314286 7691.584111 \n",
+ "5 Special Auto M 86.343750 8247.088702 \n",
+ "\n",
+ " avg_total_claim count \n",
+ "0 433.738499 169 \n",
+ "1 408.582459 154 \n",
+ "2 452.965929 540 \n",
+ "3 457.010178 536 \n",
+ "4 453.280164 35 \n",
+ "5 429.527942 32 "
]
- },
- {
- "cell_type": "markdown",
- "id": "81ff02c5-6584-4f21-a358-b918697c6432",
- "metadata": {
- "id": "81ff02c5-6584-4f21-a358-b918697c6432"
- },
- "source": [
- "5. The marketing team wants to analyze the number of policies sold by state and month. Present the data in a table where the months are arranged as columns and the states are arranged as rows."
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Group by policy_type and gender\n",
+ "grouped = yes_customers.groupby(['POLICY_TYPE', 'GENDER']).agg(\n",
+ " avg_monthly_premium=('MONTHLY_PREMIUM_AUTO', 'mean'),\n",
+ " avg_lifetime_value=('CUSTOMER_LIFETIME_VALUE', 'mean'),\n",
+ " avg_total_claim=('TOTAL_CLAIM_AMOUNT', 'mean'),\n",
+ " count=('CUSTOMER', 'count') \n",
+ ").reset_index()\n",
+ "\n",
+ "grouped"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0",
+ "metadata": {
+ "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0"
+ },
+ "source": [
+ "3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "93ea874d-d818-45a7-9096-e9909c028ca5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Count the number of customers in each state\n",
+ "state_counts = marketing_customer_analysis.groupby('STATE')['CUSTOMER'].count().reset_index()\n",
+ "\n",
+ "# Rename columns for clarity\n",
+ "state_counts.rename(columns={'CUSTOMER': 'NUM_CUSTOMERS'}, inplace=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "b05d6093-67b8-4ac8-92f4-d03cdd582f0f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " STATE | \n",
+ " NUM_CUSTOMERS | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Arizona | \n",
+ " 1937 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " California | \n",
+ " 3552 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Nevada | \n",
+ " 993 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Oregon | \n",
+ " 2909 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Washington | \n",
+ " 888 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " STATE NUM_CUSTOMERS\n",
+ "0 Arizona 1937\n",
+ "1 California 3552\n",
+ "2 Nevada 993\n",
+ "3 Oregon 2909\n",
+ "4 Washington 888"
]
- },
- {
- "cell_type": "markdown",
- "id": "b6aec097-c633-4017-a125-e77a97259cda",
- "metadata": {
- "id": "b6aec097-c633-4017-a125-e77a97259cda"
- },
- "source": [
- "6. Display a new DataFrame that contains the number of policies sold by month, by state, for the top 3 states with the highest number of policies sold.\n",
- "\n",
- "*Hint:*\n",
- "- *To accomplish this, you will first need to group the data by state and month, then count the number of policies sold for each group. Afterwards, you will need to sort the data by the count of policies sold in descending order.*\n",
- "- *Next, you will select the top 3 states with the highest number of policies sold.*\n",
- "- *Finally, you will create a new DataFrame that contains the number of policies sold by month for each of the top 3 states.*"
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Filter states with more than 500 customers\n",
+ "large_states = state_counts[state_counts['NUM_CUSTOMERS'] > 500]\n",
+ "\n",
+ "large_states"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d",
+ "metadata": {
+ "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d"
+ },
+ "source": [
+ "4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "47fa2f3e-9cd0-4098-963a-a90e3e8dda9f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "clv_stats = marketing_customer_analysis.groupby(['EDUCATION', 'GENDER'])['CUSTOMER_LIFETIME_VALUE'].agg(\n",
+ " MAX_CLV='max',\n",
+ " MIN_CLV='min',\n",
+ " MEDIAN_CLV='median'\n",
+ ").reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "ba796a7a-254c-41dd-9394-dc494c91fe44",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " EDUCATION | \n",
+ " GENDER | \n",
+ " MAX_CLV | \n",
+ " MIN_CLV | \n",
+ " MEDIAN_CLV | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Bachelor | \n",
+ " F | \n",
+ " 73225.95652 | \n",
+ " 1904.000852 | \n",
+ " 5640.505303 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 67907.27050 | \n",
+ " 1898.007675 | \n",
+ " 5548.031892 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " College | \n",
+ " F | \n",
+ " 61850.18803 | \n",
+ " 1898.683686 | \n",
+ " 5623.611187 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " College | \n",
+ " M | \n",
+ " 61134.68307 | \n",
+ " 1918.119700 | \n",
+ " 6005.847375 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Doctor | \n",
+ " F | \n",
+ " 44856.11397 | \n",
+ " 2395.570000 | \n",
+ " 5332.462694 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Doctor | \n",
+ " M | \n",
+ " 32677.34284 | \n",
+ " 2267.604038 | \n",
+ " 5577.669457 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " High School or Below | \n",
+ " F | \n",
+ " 55277.44589 | \n",
+ " 2144.921535 | \n",
+ " 6039.553187 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 83325.38119 | \n",
+ " 1940.981221 | \n",
+ " 6286.731006 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Master | \n",
+ " F | \n",
+ " 51016.06704 | \n",
+ " 2417.777032 | \n",
+ " 5729.855012 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Master | \n",
+ " M | \n",
+ " 50568.25912 | \n",
+ " 2272.307310 | \n",
+ " 5579.099207 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " EDUCATION GENDER MAX_CLV MIN_CLV MEDIAN_CLV\n",
+ "0 Bachelor F 73225.95652 1904.000852 5640.505303\n",
+ "1 Bachelor M 67907.27050 1898.007675 5548.031892\n",
+ "2 College F 61850.18803 1898.683686 5623.611187\n",
+ "3 College M 61134.68307 1918.119700 6005.847375\n",
+ "4 Doctor F 44856.11397 2395.570000 5332.462694\n",
+ "5 Doctor M 32677.34284 2267.604038 5577.669457\n",
+ "6 High School or Below F 55277.44589 2144.921535 6039.553187\n",
+ "7 High School or Below M 83325.38119 1940.981221 6286.731006\n",
+ "8 Master F 51016.06704 2417.777032 5729.855012\n",
+ "9 Master M 50568.25912 2272.307310 5579.099207"
]
- },
- {
- "cell_type": "markdown",
- "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009",
- "metadata": {
- "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009"
- },
- "source": [
- "7. The marketing team wants to analyze the effect of different marketing channels on the customer response rate.\n",
- "\n",
- "Hint: You can use melt to unpivot the data and create a table that shows the customer response rate (those who responded \"Yes\") by marketing channel."
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clv_stats"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "08ef9663-972f-4862-aa61-1a5a42781d1d",
+ "metadata": {},
+ "source": [
+ "The analysis of customer lifetime value (CLV) by education and gender shows that median CLV is relatively consistent across all education levels, ranging roughly from 5,300 to 6,300, indicating that the typical customer contributes similarly regardless of education. \n",
+ "\n",
+ "However, the maximum CLV reveals high-value outliers, with Bachelor-educated females (~`73k) and High School or Below males (`83k) representing the most profitable individual customers. \n",
+ "\n",
+ "Gender differences in median CLV are small, but max CLV varies significantly, highlighting the importance of targeting both steady revenue segments and high-value outliers. \n",
+ "\n",
+ "Overall, Bachelor and High School or Below segments appear most profitable, while Doctor, College, and Master holders provide reliable, moderate-risk revenue. \n",
+ "\n",
+ "These insights can guide marketing and retention strategies to focus on both high-potential and consistent-value customers."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b42999f9-311f-481e-ae63-40a5577072c5",
+ "metadata": {
+ "id": "b42999f9-311f-481e-ae63-40a5577072c5"
+ },
+ "source": [
+ "## Bonus"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "81ff02c5-6584-4f21-a358-b918697c6432",
+ "metadata": {
+ "id": "81ff02c5-6584-4f21-a358-b918697c6432"
+ },
+ "source": [
+ "5. The marketing team wants to analyze the number of policies sold by state and month. Present the data in a table where the months are arranged as columns and the states are arranged as rows."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "14492ced-6e95-444f-ac81-d6261c49b9b0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\Kinga\\AppData\\Local\\Temp\\ipykernel_15836\\577112004.py:2: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
+ " marketing_customer_analysis['EFFECTIVE_TO_DATE'] = pd.to_datetime(marketing_customer_analysis['EFFECTIVE_TO_DATE'])\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Ensure the date column is in datetime format\n",
+ "marketing_customer_analysis['EFFECTIVE_TO_DATE'] = pd.to_datetime(marketing_customer_analysis['EFFECTIVE_TO_DATE'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "1b057f04-a8e3-4fd9-b923-55718393d17b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Extract the month name from the date\n",
+ "marketing_customer_analysis['MONTH'] = marketing_customer_analysis['EFFECTIVE_TO_DATE'].dt.month_name()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "13420fd7-f874-4bc0-b803-8c664332fd1b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create pivot table: rows = state, columns = month, values = number of policies\n",
+ "policies_by_state_month = marketing_customer_analysis.pivot_table(\n",
+ " index='STATE',\n",
+ " columns='MONTH',\n",
+ " values='POLICY',\n",
+ " aggfunc='count',\n",
+ " fill_value=0 # fill missing combinations with 0\n",
+ ")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "b2714417-b755-41b7-957c-ae8cb111b6e6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " MONTH | \n",
+ " January | \n",
+ " February | \n",
+ " March | \n",
+ " April | \n",
+ " May | \n",
+ " June | \n",
+ " July | \n",
+ " August | \n",
+ " September | \n",
+ " October | \n",
+ " November | \n",
+ " December | \n",
+ "
\n",
+ " \n",
+ " STATE | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Arizona | \n",
+ " 1008 | \n",
+ " 929 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " California | \n",
+ " 1918 | \n",
+ " 1634 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " Nevada | \n",
+ " 551 | \n",
+ " 442 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " Oregon | \n",
+ " 1565 | \n",
+ " 1344 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " Washington | \n",
+ " 463 | \n",
+ " 425 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "MONTH January February March April May June July August \\\n",
+ "STATE \n",
+ "Arizona 1008 929 0 0 0 0 0 0 \n",
+ "California 1918 1634 0 0 0 0 0 0 \n",
+ "Nevada 551 442 0 0 0 0 0 0 \n",
+ "Oregon 1565 1344 0 0 0 0 0 0 \n",
+ "Washington 463 425 0 0 0 0 0 0 \n",
+ "\n",
+ "MONTH September October November December \n",
+ "STATE \n",
+ "Arizona 0 0 0 0 \n",
+ "California 0 0 0 0 \n",
+ "Nevada 0 0 0 0 \n",
+ "Oregon 0 0 0 0 \n",
+ "Washington 0 0 0 0 "
]
- },
- {
- "cell_type": "markdown",
- "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d",
- "metadata": {
- "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d"
- },
- "source": [
- "External Resources for Data Filtering: https://towardsdatascience.com/filtering-data-frames-in-pandas-b570b1f834b9"
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Sort the months in calendar order\n",
+ "month_order = ['January', 'February', 'March', 'April', 'May', 'June',\n",
+ " 'July', 'August', 'September', 'October', 'November', 'December']\n",
+ "policies_by_state_month = policies_by_state_month.reindex(columns=month_order, fill_value=0)\n",
+ "\n",
+ "policies_by_state_month"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b6aec097-c633-4017-a125-e77a97259cda",
+ "metadata": {
+ "id": "b6aec097-c633-4017-a125-e77a97259cda"
+ },
+ "source": [
+ "6. Display a new DataFrame that contains the number of policies sold by month, by state, for the top 3 states with the highest number of policies sold.\n",
+ "\n",
+ "*Hint:*\n",
+ "- *To accomplish this, you will first need to group the data by state and month, then count the number of policies sold for each group. Afterwards, you will need to sort the data by the count of policies sold in descending order.*\n",
+ "- *Next, you will select the top 3 states with the highest number of policies sold.*\n",
+ "- *Finally, you will create a new DataFrame that contains the number of policies sold by month for each of the top 3 states.*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "6ee33ba6-0f1d-4461-850f-071994231e54",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Count total policies per state to find top 3\n",
+ "state_policy_counts = marketing_customer_analysis.groupby('STATE')['POLICY'].count().reset_index()\n",
+ "state_policy_counts.rename(columns={'POLICY': 'TOTAL_POLICIES'}, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "d971221b-8358-473c-a016-49cc81628ddb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Get top 3 states\n",
+ "top_3_states = state_policy_counts.nlargest(3, 'TOTAL_POLICIES')['STATE'].tolist()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "c3b1497f-8a26-451b-9b51-fa2267eb0afe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Filter original data for top 3 states\n",
+ "top_states_data = marketing_customer_analysis[marketing_customer_analysis['STATE'].isin(top_3_states)]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "f44a5c15-79e7-47f0-a0a5-34546aa4a31b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create pivot table: rows = STATE, columns = MONTH, values = number of policies\n",
+ "policies_by_month_top_states = top_states_data.pivot_table(\n",
+ " index='STATE',\n",
+ " columns='MONTH',\n",
+ " values='POLICY',\n",
+ " aggfunc='count',\n",
+ " fill_value=0\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "f96b2878-7f30-4596-b2c9-5f339ce1271d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Arrange months in calendar order\n",
+ "month_order = ['January', 'February', 'March', 'April', 'May', 'June',\n",
+ " 'July', 'August', 'September', 'October', 'November', 'December']\n",
+ "policies_by_month_top_states = policies_by_month_top_states.reindex(columns=month_order, fill_value=0)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "1ea1f4d0-7bc6-4028-ae73-3c731cfaa20a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " MONTH | \n",
+ " January | \n",
+ " February | \n",
+ " March | \n",
+ " April | \n",
+ " May | \n",
+ " June | \n",
+ " July | \n",
+ " August | \n",
+ " September | \n",
+ " October | \n",
+ " November | \n",
+ " December | \n",
+ "
\n",
+ " \n",
+ " STATE | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Arizona | \n",
+ " 1008 | \n",
+ " 929 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " California | \n",
+ " 1918 | \n",
+ " 1634 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " Oregon | \n",
+ " 1565 | \n",
+ " 1344 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "MONTH January February March April May June July August \\\n",
+ "STATE \n",
+ "Arizona 1008 929 0 0 0 0 0 0 \n",
+ "California 1918 1634 0 0 0 0 0 0 \n",
+ "Oregon 1565 1344 0 0 0 0 0 0 \n",
+ "\n",
+ "MONTH September October November December \n",
+ "STATE \n",
+ "Arizona 0 0 0 0 \n",
+ "California 0 0 0 0 \n",
+ "Oregon 0 0 0 0 "
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "449513f4-0459-46a0-a18d-9398d974c9ad",
- "metadata": {
- "id": "449513f4-0459-46a0-a18d-9398d974c9ad"
- },
- "outputs": [],
- "source": [
- "# your code goes here"
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "policies_by_month_top_states"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009",
+ "metadata": {
+ "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009"
+ },
+ "source": [
+ "7. The marketing team wants to analyze the effect of different marketing channels on the customer response rate.\n",
+ "\n",
+ "Hint: You can use melt to unpivot the data and create a table that shows the customer response rate (those who responded \"Yes\") by marketing channel."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d",
+ "metadata": {
+ "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d"
+ },
+ "source": [
+ "External Resources for Data Filtering: https://towardsdatascience.com/filtering-data-frames-in-pandas-b570b1f834b9"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "449513f4-0459-46a0-a18d-9398d974c9ad",
+ "metadata": {
+ "id": "449513f4-0459-46a0-a18d-9398d974c9ad"
+ },
+ "outputs": [],
+ "source": [
+ "# Convert response to numeric for easier calculation (Yes=1, No=0)\n",
+ "marketing_customer_analysis['RESPONSE_NUM'] = marketing_customer_analysis['RESPONSE'].map({'Yes': 1, 'No': 0})\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "2a67d763-4a5c-4a5d-9e75-ccb49713f880",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Group by SALES_CHANNEL and calculate response rate\n",
+ "response_by_channel = marketing_customer_analysis.groupby('SALES_CHANNEL').agg(\n",
+ " TOTAL_CUSTOMERS=('CUSTOMER', 'count'),\n",
+ " RESPONSES=('RESPONSE_NUM', 'sum')\n",
+ ").reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "f3b9ad30-2a8a-442e-970e-1d1b19b29c1f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Calculate response rate as a percentage\n",
+ "response_by_channel['RESPONSE_RATE'] = (response_by_channel['RESPONSES'] / response_by_channel['TOTAL_CUSTOMERS']) * 100\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "id": "d1faf9e8-a7d5-4924-821f-def126996c19",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Sort by response rate descending\n",
+ "response_by_channel = response_by_channel.sort_values(by='RESPONSE_RATE', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "abaa9add-a6e7-4fb3-892d-2ca24a4b9635",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CUSTOMERS | \n",
+ " RESPONSES | \n",
+ " RESPONSE_RATE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Agent | \n",
+ " 4121 | \n",
+ " 742.0 | \n",
+ " 18.005339 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Web | \n",
+ " 1626 | \n",
+ " 177.0 | \n",
+ " 10.885609 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Branch | \n",
+ " 3022 | \n",
+ " 326.0 | \n",
+ " 10.787558 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Call Center | \n",
+ " 2141 | \n",
+ " 221.0 | \n",
+ " 10.322279 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " SALES_CHANNEL TOTAL_CUSTOMERS RESPONSES RESPONSE_RATE\n",
+ "0 Agent 4121 742.0 18.005339\n",
+ "3 Web 1626 177.0 10.885609\n",
+ "1 Branch 3022 326.0 10.787558\n",
+ "2 Call Center 2141 221.0 10.322279"
]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
}
- ],
- "metadata": {
- "colab": {
- "provenance": []
- },
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.13"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
+ ],
+ "source": [
+ "response_by_channel"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "009b536d-5755-4214-a01d-ebed69c2d08f",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python [conda env:base] *",
+ "language": "python",
+ "name": "conda-base-py"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
}