diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb index fadd718..bfe6bab 100644 --- a/lab-dw-aggregating.ipynb +++ b/lab-dw-aggregating.ipynb @@ -1,165 +1,2769 @@ { - "cells": [ + "cells": [ + { + "cell_type": "markdown", + "id": "31969215-2a90-4d8b-ac36-646a7ae13744", + "metadata": { + "id": "31969215-2a90-4d8b-ac36-646a7ae13744" + }, + "source": [ + "# Lab | Data Aggregation and Filtering" + ] + }, + { + "cell_type": "markdown", + "id": "a8f08a52-bec0-439b-99cc-11d3809d8b5d", + "metadata": { + "id": "a8f08a52-bec0-439b-99cc-11d3809d8b5d" + }, + "source": [ + "In this challenge, we will continue to work with customer data from an insurance company. We will use the dataset called marketing_customer_analysis.csv, which can be found at the following link:\n", + "\n", + "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\n", + "\n", + "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by first performing data cleaning, formatting, and structuring." + ] + }, + { + "cell_type": "markdown", + "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50", + "metadata": { + "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50" + }, + "source": [ + "1. Create a new DataFrame that only includes customers who:\n", + " - have a **low total_claim_amount** (e.g., below $1,000),\n", + " - have a response \"Yes\" to the last marketing campaign." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8f1f3dea", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "31969215-2a90-4d8b-ac36-646a7ae13744", - "metadata": { - "id": "31969215-2a90-4d8b-ac36-646a7ae13744" + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Unnamed: 0", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Customer", + "rawType": "object", + "type": "string" + }, + { + "name": "State", + "rawType": "object", + "type": "string" + }, + { + "name": "Customer Lifetime Value", + "rawType": "float64", + "type": "float" + }, + { + "name": "Response", + "rawType": "object", + "type": "string" + }, + { + "name": "Coverage", + "rawType": "object", + "type": "string" + }, + { + "name": "Education", + "rawType": "object", + "type": "string" + }, + { + "name": "Effective To Date", + "rawType": "object", + "type": "string" + }, + { + "name": "EmploymentStatus", + "rawType": "object", + "type": "string" + }, + { + "name": "Gender", + "rawType": "object", + "type": "string" + }, + { + "name": "Income", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Location Code", + "rawType": "object", + "type": "string" + }, + { + "name": "Marital Status", + "rawType": "object", + "type": "string" + }, + { + "name": "Monthly Premium Auto", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Months Since Last Claim", + "rawType": "float64", + "type": "float" + }, + { + "name": "Months Since Policy Inception", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Number of Open Complaints", + "rawType": "float64", + "type": "float" + }, + { + "name": "Number of Policies", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Policy Type", + "rawType": "object", + "type": "string" + }, + { + "name": "Policy", + "rawType": "object", + "type": "string" + }, + { + "name": "Renew Offer Type", + "rawType": "object", + "type": "string" + }, + { + "name": "Sales Channel", + "rawType": "object", + "type": "string" + }, + { + "name": "Total Claim Amount", + "rawType": "float64", + "type": "float" + }, + { + "name": "Vehicle Class", + "rawType": "object", + "type": "string" + }, + { + "name": "Vehicle Size", + "rawType": "object", + "type": "string" + }, + { + "name": "Vehicle Type", + "rawType": "object", + "type": "unknown" + } + ], + "ref": "0183b8b4-b53c-4959-93ef-3679d8daee96", + "rows": [ + [ + "0", + "0", + "DK49336", + "Arizona", + "4809.21696", + "No", + "Basic", + "College", + "2/18/11", + "Employed", + "M", + "48029", + "Suburban", + "Married", + "61", + "7.0", + "52", + "0.0", + "9", + "Corporate Auto", + "Corporate L3", + "Offer3", + "Agent", + "292.8", + "Four-Door Car", + "Medsize", + null + ], + [ + "1", + "1", + "KX64629", + "California", + "2228.525238", + "No", + "Basic", + "College", + "1/18/11", + "Unemployed", + "F", + "0", + "Suburban", + "Single", + "64", + "3.0", + "26", + "0.0", + "1", + "Personal Auto", + "Personal L3", + "Offer4", + "Call Center", + "744.924331", + "Four-Door Car", + "Medsize", + null + ], + [ + "2", + "2", + "LZ68649", + "Washington", + "14947.9173", + "No", + "Basic", + "Bachelor", + "2/10/11", + "Employed", + "M", + "22139", + "Suburban", + "Single", + "100", + "34.0", + "31", + "0.0", + "2", + "Personal Auto", + "Personal L3", + "Offer3", + "Call Center", + "480.0", + "SUV", + "Medsize", + "A" + ], + [ + "3", + "3", + "XL78013", + "Oregon", + "22332.43946", + "Yes", + "Extended", + "College", + "1/11/11", + "Employed", + "M", + "49078", + "Suburban", + "Single", + "97", + "10.0", + "3", + "0.0", + "2", + "Corporate Auto", + "Corporate L3", + "Offer2", + "Branch", + "484.013411", + "Four-Door Car", + "Medsize", + "A" + ], + [ + "4", + "4", + "QA50777", + "Oregon", + "9025.067525", + "No", + "Premium", + "Bachelor", + "1/17/11", + "Medical Leave", + "F", + "23675", + "Suburban", + "Married", + "117", + null, + "31", + null, + "7", + "Personal Auto", + "Personal L2", + "Offer1", + "Branch", + "707.925645", + "Four-Door Car", + "Medsize", + null + ] + ], + "shape": { + "columns": 26, + "rows": 5 + } }, - "source": [ - "# Lab | Data Aggregation and Filtering" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0CustomerStateCustomer Lifetime ValueResponseCoverageEducationEffective To DateEmploymentStatusGender...Number of Open ComplaintsNumber of PoliciesPolicy TypePolicyRenew Offer TypeSales ChannelTotal Claim AmountVehicle ClassVehicle SizeVehicle Type
00DK49336Arizona4809.216960NoBasicCollege2/18/11EmployedM...0.09Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeNaN
11KX64629California2228.525238NoBasicCollege1/18/11UnemployedF...0.01Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeNaN
22LZ68649Washington14947.917300NoBasicBachelor2/10/11EmployedM...0.02Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA
33XL78013Oregon22332.439460YesExtendedCollege1/11/11EmployedM...0.02Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA
44QA50777Oregon9025.067525NoPremiumBachelor1/17/11Medical LeaveF...NaN7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 Customer State Customer Lifetime Value Response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " Coverage Education Effective To Date EmploymentStatus Gender ... \\\n", + "0 Basic College 2/18/11 Employed M ... \n", + "1 Basic College 1/18/11 Unemployed F ... \n", + "2 Basic Bachelor 2/10/11 Employed M ... \n", + "3 Extended College 1/11/11 Employed M ... \n", + "4 Premium Bachelor 1/17/11 Medical Leave F ... \n", + "\n", + " Number of Open Complaints Number of Policies Policy Type Policy \\\n", + "0 0.0 9 Corporate Auto Corporate L3 \n", + "1 0.0 1 Personal Auto Personal L3 \n", + "2 0.0 2 Personal Auto Personal L3 \n", + "3 0.0 2 Corporate Auto Corporate L3 \n", + "4 NaN 7 Personal Auto Personal L2 \n", + "\n", + " Renew Offer Type Sales Channel Total Claim Amount Vehicle Class \\\n", + "0 Offer3 Agent 292.800000 Four-Door Car \n", + "1 Offer4 Call Center 744.924331 Four-Door Car \n", + "2 Offer3 Call Center 480.000000 SUV \n", + "3 Offer2 Branch 484.013411 Four-Door Car \n", + "4 Offer1 Branch 707.925645 Four-Door Car \n", + "\n", + " Vehicle Size Vehicle Type \n", + "0 Medsize NaN \n", + "1 Medsize NaN \n", + "2 Medsize A \n", + "3 Medsize A \n", + "4 Medsize NaN \n", + "\n", + "[5 rows x 26 columns]" ] - }, + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n", + "df = pd.read_csv(url)\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ec7b25d6", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "a8f08a52-bec0-439b-99cc-11d3809d8b5d", - "metadata": { - "id": "a8f08a52-bec0-439b-99cc-11d3809d8b5d" - }, - "source": [ - "In this challenge, we will continue to work with customer data from an insurance company. We will use the dataset called marketing_customer_analysis.csv, which can be found at the following link:\n", - "\n", - "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\n", - "\n", - "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by first performing data cleaning, formatting, and structuring." - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "0 292.800000\n", + "1 744.924331\n", + "2 480.000000\n", + "3 484.013411\n", + "4 707.925645\n", + " ... \n", + "10905 1214.400000\n", + "10906 273.018929\n", + "10907 381.306996\n", + "10908 618.288849\n", + "10909 1021.719397\n", + "Name: Total Claim Amount, Length: 10910, dtype: float64\n", + "0 292\n", + "1 744\n", + "2 480\n", + "3 484\n", + "4 707\n", + " ... \n", + "10905 1214\n", + "10906 273\n", + "10907 381\n", + "10908 618\n", + "10909 1021\n", + "Name: Total Claim Amount, Length: 10910, dtype: int64\n", + "Los nulos son: 0\n" + ] + } + ], + "source": [ + "print(df['Total Claim Amount'])\n", + "\n", + "#Convert 'Total Claim Amount' to interger\n", + "df['Total Claim Amount'] = df['Total Claim Amount'].astype(int)\n", + "\n", + "print(df['Total Claim Amount'])\n", + "\n", + "print('Los nulos son:', df['Total Claim Amount'].isnull().sum()) # Check for null values" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c2f8ba03", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50", - "metadata": { - "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50" - }, - "source": [ - "1. Create a new DataFrame that only includes customers who:\n", - " - have a **low total_claim_amount** (e.g., below $1,000),\n", - " - have a response \"Yes\" to the last marketing campaign." + "data": { + "text/plain": [ + "array(['No', 'Yes', nan], dtype=object)" ] - }, + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Response'].unique() # Check unique values in 'Response' column" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "831526e7", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "b9be383e-5165-436e-80c8-57d4c757c8c3", - "metadata": { - "id": "b9be383e-5165-436e-80c8-57d4c757c8c3" + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Unnamed: 0", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Customer", + "rawType": "object", + "type": "string" + }, + { + "name": "State", + "rawType": "object", + "type": "string" + }, + { + "name": "Customer Lifetime Value", + "rawType": "float64", + "type": "float" + }, + { + "name": "Response", + "rawType": "object", + "type": "string" + }, + { + "name": "Coverage", + "rawType": "object", + "type": "string" + }, + { + "name": "Education", + "rawType": "object", + "type": "string" + }, + { + "name": "Effective To Date", + "rawType": "object", + "type": "string" + }, + { + "name": "EmploymentStatus", + "rawType": "object", + "type": "string" + }, + { + "name": "Gender", + "rawType": "object", + "type": "string" + }, + { + "name": "Income", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Location Code", + "rawType": "object", + "type": "string" + }, + { + "name": "Marital Status", + "rawType": "object", + "type": "string" + }, + { + "name": "Monthly Premium Auto", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Months Since Last Claim", + "rawType": "float64", + "type": "float" + }, + { + "name": "Months Since Policy Inception", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Number of Open Complaints", + "rawType": "float64", + "type": "float" + }, + { + "name": "Number of Policies", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Policy Type", + "rawType": "object", + "type": "string" + }, + { + "name": "Policy", + "rawType": "object", + "type": "string" + }, + { + "name": "Renew Offer Type", + "rawType": "object", + "type": "string" + }, + { + "name": "Sales Channel", + "rawType": "object", + "type": "string" + }, + { + "name": "Total Claim Amount", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Vehicle Class", + "rawType": "object", + "type": "string" + }, + { + "name": "Vehicle Size", + "rawType": "object", + "type": "string" + }, + { + "name": "Vehicle Type", + "rawType": "object", + "type": "unknown" + } + ], + "ref": "96eeb177-898a-4a19-8f7e-bafc78499a42", + "rows": [ + [ + "3", + "3", + "XL78013", + "Oregon", + "22332.43946", + "Yes", + "Extended", + "College", + "1/11/11", + "Employed", + "M", + "49078", + "Suburban", + "Single", + "97", + "10.0", + "3", + "0.0", + "2", + "Corporate Auto", + "Corporate L3", + "Offer2", + "Branch", + "484", + "Four-Door Car", + "Medsize", + "A" + ], + [ + "8", + "8", + "FM55990", + "California", + "5989.773931", + "Yes", + "Premium", + "College", + "1/19/11", + "Employed", + "M", + "66839", + "Suburban", + "Single", + "154", + "33.0", + "24", + "0.0", + "1", + "Personal Auto", + "Personal L1", + "Offer2", + "Branch", + "739", + "Sports Car", + "Medsize", + null + ], + [ + "15", + "15", + "CW49887", + "California", + "4626.801093", + "Yes", + "Basic", + "Master", + "1/16/11", + "Employed", + "F", + "79487", + "Suburban", + "Divorced", + "114", + "20.0", + "87", + "0.0", + "1", + "Special Auto", + "Special L1", + "Offer2", + "Branch", + "547", + "SUV", + "Medsize", + null + ], + [ + "19", + "19", + "NJ54277", + "California", + "3746.751625", + "Yes", + "Extended", + "College", + "2/26/11", + "Employed", + "F", + "41479", + "Rural", + "Married", + "94", + "14.0", + "38", + "1.0", + "1", + "Personal Auto", + "Personal L2", + "Offer2", + "Call Center", + "19", + "Two-Door Car", + "Large", + "A" + ], + [ + "27", + "27", + "MQ68407", + "Oregon", + "4376.363592", + "Yes", + "Premium", + "Bachelor", + "2/28/11", + "Employed", + "F", + "63774", + "Rural", + "Divorced", + "111", + "18.0", + "63", + "0.0", + "1", + "Personal Auto", + "Personal L3", + "Offer2", + "Agent", + "60", + "Four-Door Car", + "Medsize", + null + ] + ], + "shape": { + "columns": 26, + "rows": 5 + } }, - "source": [ - "2. Using the original Dataframe, analyze:\n", - " - the average `monthly_premium` and/or customer lifetime value by `policy_type` and `gender` for customers who responded \"Yes\", and\n", - " - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company." + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0CustomerStateCustomer Lifetime ValueResponseCoverageEducationEffective To DateEmploymentStatusGender...Number of Open ComplaintsNumber of PoliciesPolicy TypePolicyRenew Offer TypeSales ChannelTotal Claim AmountVehicle ClassVehicle SizeVehicle Type
33XL78013Oregon22332.439460YesExtendedCollege1/11/11EmployedM...0.02Corporate AutoCorporate L3Offer2Branch484Four-Door CarMedsizeA
88FM55990California5989.773931YesPremiumCollege1/19/11EmployedM...0.01Personal AutoPersonal L1Offer2Branch739Sports CarMedsizeNaN
1515CW49887California4626.801093YesBasicMaster1/16/11EmployedF...0.01Special AutoSpecial L1Offer2Branch547SUVMedsizeNaN
1919NJ54277California3746.751625YesExtendedCollege2/26/11EmployedF...1.01Personal AutoPersonal L2Offer2Call Center19Two-Door CarLargeA
2727MQ68407Oregon4376.363592YesPremiumBachelor2/28/11EmployedF...0.01Personal AutoPersonal L3Offer2Agent60Four-Door CarMedsizeNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 Customer State Customer Lifetime Value Response \\\n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "8 8 FM55990 California 5989.773931 Yes \n", + "15 15 CW49887 California 4626.801093 Yes \n", + "19 19 NJ54277 California 3746.751625 Yes \n", + "27 27 MQ68407 Oregon 4376.363592 Yes \n", + "\n", + " Coverage Education Effective To Date EmploymentStatus Gender ... \\\n", + "3 Extended College 1/11/11 Employed M ... \n", + "8 Premium College 1/19/11 Employed M ... \n", + "15 Basic Master 1/16/11 Employed F ... \n", + "19 Extended College 2/26/11 Employed F ... \n", + "27 Premium Bachelor 2/28/11 Employed F ... \n", + "\n", + " Number of Open Complaints Number of Policies Policy Type \\\n", + "3 0.0 2 Corporate Auto \n", + "8 0.0 1 Personal Auto \n", + "15 0.0 1 Special Auto \n", + "19 1.0 1 Personal Auto \n", + "27 0.0 1 Personal Auto \n", + "\n", + " Policy Renew Offer Type Sales Channel Total Claim Amount \\\n", + "3 Corporate L3 Offer2 Branch 484 \n", + "8 Personal L1 Offer2 Branch 739 \n", + "15 Special L1 Offer2 Branch 547 \n", + "19 Personal L2 Offer2 Call Center 19 \n", + "27 Personal L3 Offer2 Agent 60 \n", + "\n", + " Vehicle Class Vehicle Size Vehicle Type \n", + "3 Four-Door Car Medsize A \n", + "8 Sports Car Medsize NaN \n", + "15 SUV Medsize NaN \n", + "19 Two-Door Car Large A \n", + "27 Four-Door Car Medsize NaN \n", + "\n", + "[5 rows x 26 columns]" ] - }, + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#1. Create a new DataFrame that only includes customers who:\n", + " #- have a **low total_claim_amount** (e.g., below $1,000),\n", + " #- have a response \"Yes\" to the last marketing campaign.\n", + "\n", + "low_claim_df = df[(df['Total Claim Amount'] < 1000) & (df['Response'] == 'Yes')]\n", + "\n", + "low_claim_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "b9be383e-5165-436e-80c8-57d4c757c8c3", + "metadata": { + "id": "b9be383e-5165-436e-80c8-57d4c757c8c3" + }, + "source": [ + "2. Using the original Dataframe, analyze:\n", + " - the average `monthly_premium` and/or customer lifetime value by `policy_type` and `gender` for customers who responded \"Yes\", and\n", + " - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "09db4b9e", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0", - "metadata": { - "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0" + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Policy Type", + "rawType": "object", + "type": "string" + }, + { + "name": "Gender", + "rawType": "object", + "type": "string" + }, + { + "name": "Monthly Premium Auto", + "rawType": "float64", + "type": "float" + }, + { + "name": "Customer Lifetime Value", + "rawType": "float64", + "type": "float" + }, + { + "name": "Total Claim Amount", + "rawType": "float64", + "type": "float" + } + ], + "ref": "bdec9b37-0675-418e-a28a-f8d72f9acb89", + "rows": [ + [ + "0", + "Corporate Auto", + "F", + "94.30177514792899", + "7712.62873610651", + "433.29585798816566" + ], + [ + "1", + "Corporate Auto", + "M", + "92.18831168831169", + "7944.465413844156", + "408.1233766233766" + ], + [ + "2", + "Personal Auto", + "F", + "98.99814814814815", + "8339.791842237037", + "452.4981481481482" + ], + [ + "3", + "Personal Auto", + "M", + "91.08582089552239", + "7448.383280707089", + "456.5764925373134" + ], + [ + "4", + "Special Auto", + "F", + "92.31428571428572", + "7691.584111285713", + "452.85714285714283" + ], + [ + "5", + "Special Auto", + "M", + "86.34375", + "8247.08870234375", + "429.125" + ] + ], + "shape": { + "columns": 5, + "rows": 6 + } }, - "source": [ - "3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers." + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Policy TypeGenderMonthly Premium AutoCustomer Lifetime ValueTotal Claim Amount
0Corporate AutoF94.3017757712.628736433.295858
1Corporate AutoM92.1883127944.465414408.123377
2Personal AutoF98.9981488339.791842452.498148
3Personal AutoM91.0858217448.383281456.576493
4Special AutoF92.3142867691.584111452.857143
5Special AutoM86.3437508247.088702429.125000
\n", + "
" + ], + "text/plain": [ + " Policy Type Gender Monthly Premium Auto Customer Lifetime Value \\\n", + "0 Corporate Auto F 94.301775 7712.628736 \n", + "1 Corporate Auto M 92.188312 7944.465414 \n", + "2 Personal Auto F 98.998148 8339.791842 \n", + "3 Personal Auto M 91.085821 7448.383281 \n", + "4 Special Auto F 92.314286 7691.584111 \n", + "5 Special Auto M 86.343750 8247.088702 \n", + "\n", + " Total Claim Amount \n", + "0 433.295858 \n", + "1 408.123377 \n", + "2 452.498148 \n", + "3 456.576493 \n", + "4 452.857143 \n", + "5 429.125000 " ] - }, + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#the average `monthly_premium` and/or customer lifetime value by `policy_type` and `gender` for customers who responded \"Yes\"\n", + "\n", + "subset = ['Policy Type','Gender']\n", + "\n", + "avg_values = df[df['Response'] == 'Yes'].groupby(subset)[['Monthly Premium Auto', 'Customer Lifetime Value', 'Total Claim Amount']].mean().reset_index()\n", + "\n", + "avg_values\n", + "\n", + "\n", + "#INSIGHTS:\n", + "\n", + "# 1. Policy Type: \n", + "# Customers with 'Personal Auto' policies tend to have lower average total claim amounts compared to those with 'Corporate Auto' policies. \n", + "# This suggests that 'Personal Auto' customers may represent a lower-risk segment for the company.\n", + "\n", + "# 2. Gender: \n", + "# Female customers generally have slightly lower average total claim amounts than male customers, indicating that female customers may be a more profitable segment for the company.\n" + ] + }, + { + "cell_type": "markdown", + "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0", + "metadata": { + "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0" + }, + "source": [ + "3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8b2e63ef", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d", - "metadata": { - "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d" + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "State", + "rawType": "object", + "type": "string" + }, + { + "name": "count", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "64818959-add1-4e88-9b03-7c17a0a23527", + "rows": [ + [ + "California", + "3552" + ], + [ + "Oregon", + "2909" + ], + [ + "Arizona", + "1937" + ], + [ + "Nevada", + "993" + ], + [ + "Washington", + "888" + ] + ], + "shape": { + "columns": 1, + "rows": 5 + } }, - "source": [ - "4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions." + "text/plain": [ + "State\n", + "California 3552\n", + "Oregon 2909\n", + "Arizona 1937\n", + "Nevada 993\n", + "Washington 888\n", + "Name: count, dtype: int64" ] - }, + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers.\n", + "\n", + "state_counts = df['State'].value_counts()\n", + "\n", + "filtered_states = state_counts[state_counts > 500]\n", + "\n", + "filtered_states" + ] + }, + { + "cell_type": "markdown", + "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d", + "metadata": { + "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d" + }, + "source": [ + "4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f41ffb00", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "b42999f9-311f-481e-ae63-40a5577072c5", - "metadata": { - "id": "b42999f9-311f-481e-ae63-40a5577072c5" + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Education", + "rawType": "object", + "type": "string" + }, + { + "name": "Gender", + "rawType": "object", + "type": "string" + }, + { + "name": "max", + "rawType": "float64", + "type": "float" + }, + { + "name": "min", + "rawType": "float64", + "type": "float" + }, + { + "name": "median", + "rawType": "float64", + "type": "float" + } + ], + "ref": "0350ed8e-3045-4142-aa03-aa3441820144", + "rows": [ + [ + "0", + "Bachelor", + "F", + "73225.95652", + "1904.000852", + "5640.505303" + ], + [ + "1", + "Bachelor", + "M", + "67907.2705", + "1898.007675", + "5548.031892" + ], + [ + "2", + "College", + "F", + "61850.18803", + "1898.683686", + "5623.611187" + ], + [ + "3", + "College", + "M", + "61134.68307", + "1918.1197", + "6005.847375" + ], + [ + "4", + "Doctor", + "F", + "44856.11397", + "2395.57", + "5332.462694" + ], + [ + "5", + "Doctor", + "M", + "32677.34284", + "2267.604038", + "5577.669457" + ], + [ + "6", + "High School or Below", + "F", + "55277.44589", + "2144.921535", + "6039.5531869999995" + ], + [ + "7", + "High School or Below", + "M", + "83325.38119", + "1940.981221", + "6286.731006" + ], + [ + "8", + "Master", + "F", + "51016.06704", + "2417.777032", + "5729.855012" + ], + [ + "9", + "Master", + "M", + "50568.25912", + "2272.30731", + "5579.0992074999995" + ] + ], + "shape": { + "columns": 5, + "rows": 10 + } }, - "source": [ - "## Bonus" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EducationGendermaxminmedian
0BachelorF73225.956521904.0008525640.505303
1BachelorM67907.270501898.0076755548.031892
2CollegeF61850.188031898.6836865623.611187
3CollegeM61134.683071918.1197006005.847375
4DoctorF44856.113972395.5700005332.462694
5DoctorM32677.342842267.6040385577.669457
6High School or BelowF55277.445892144.9215356039.553187
7High School or BelowM83325.381191940.9812216286.731006
8MasterF51016.067042417.7770325729.855012
9MasterM50568.259122272.3073105579.099207
\n", + "
" + ], + "text/plain": [ + " Education Gender max min median\n", + "0 Bachelor F 73225.95652 1904.000852 5640.505303\n", + "1 Bachelor M 67907.27050 1898.007675 5548.031892\n", + "2 College F 61850.18803 1898.683686 5623.611187\n", + "3 College M 61134.68307 1918.119700 6005.847375\n", + "4 Doctor F 44856.11397 2395.570000 5332.462694\n", + "5 Doctor M 32677.34284 2267.604038 5577.669457\n", + "6 High School or Below F 55277.44589 2144.921535 6039.553187\n", + "7 High School or Below M 83325.38119 1940.981221 6286.731006\n", + "8 Master F 51016.06704 2417.777032 5729.855012\n", + "9 Master M 50568.25912 2272.307310 5579.099207" ] - }, + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions.\n", + "\n", + "CLV_analysis = df.groupby(['Education','Gender'])['Customer Lifetime Value'].agg(['max','min','median']).reset_index()\n", + "\n", + "CLV_analysis\n", + "\n", + "#INSIGHTS:\n", + "# 1. Education Level:\n", + "#Customers with Bachelor or College education levels show the highest customer lifetime value, while Doctor and Master levels show lower averages.\n", + "\n", + "# 2. Gender:\n", + "#Gender differences vary by education level. \n", + "#Male customers show higher CLV in some segments (High School or Below College), while females lead in others (Bachelor, Doctor).\n", + "#There is no consistent gender advantage across all levels." + ] + }, + { + "cell_type": "markdown", + "id": "b42999f9-311f-481e-ae63-40a5577072c5", + "metadata": { + "id": "b42999f9-311f-481e-ae63-40a5577072c5" + }, + "source": [ + "## Bonus" + ] + }, + { + "cell_type": "markdown", + "id": "81ff02c5-6584-4f21-a358-b918697c6432", + "metadata": { + "id": "81ff02c5-6584-4f21-a358-b918697c6432" + }, + "source": [ + "5. The marketing team wants to analyze the number of policies sold by state and month. Present the data in a table where the months are arranged as columns and the states are arranged as rows." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "de566c50", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "81ff02c5-6584-4f21-a358-b918697c6432", - "metadata": { - "id": "81ff02c5-6584-4f21-a358-b918697c6432" + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "State", + "rawType": "object", + "type": "string" + }, + { + "name": "January", + "rawType": "int64", + "type": "integer" + }, + { + "name": "February", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "112468aa-ec6b-45a9-8394-c7fdbb9a3621", + "rows": [ + [ + "Arizona", + "3052", + "2864" + ], + [ + "California", + "5673", + "4929" + ], + [ + "Nevada", + "1493", + "1278" + ], + [ + "Oregon", + "4697", + "3969" + ], + [ + "Washington", + "1358", + "1225" + ] + ], + "shape": { + "columns": 2, + "rows": 5 + } }, - "source": [ - "5. The marketing team wants to analyze the number of policies sold by state and month. Present the data in a table where the months are arranged as columns and the states are arranged as rows." + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MonthJanuaryFebruary
State
Arizona30522864
California56734929
Nevada14931278
Oregon46973969
Washington13581225
\n", + "
" + ], + "text/plain": [ + "Month January February\n", + "State \n", + "Arizona 3052 2864\n", + "California 5673 4929\n", + "Nevada 1493 1278\n", + "Oregon 4697 3969\n", + "Washington 1358 1225" ] - }, + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#5. The marketing team wants to analyze the number of policies sold by state and month.\n", + "#Present the data in a table where the months are arranged as columns and the states are arranged as rows.\n", + "\n", + "\n", + "# First, ensure that the 'Effective To Date' column is in datetime format\n", + "df['Effective To Date'] = pd.to_datetime(df['Effective To Date'])\n", + "\n", + "# Extract month from 'Effective To Date'\n", + "df['Month'] = df['Effective To Date'].dt.month_name()\n", + "\n", + "# Create a pivot table with states as rows and months as columns\n", + "# The dataset column is named 'Number of Policies' — sum it to get total policies sold per state/month\n", + "\n", + "policy_pivot = pd.pivot_table(\n", + " df,\n", + " index='State',\n", + " columns='Month',\n", + " values='Number of Policies',\n", + " aggfunc='sum',\n", + " fill_value=0\n", + ")\n", + "\n", + "policy_pivot = policy_pivot.reindex(columns=[\n", + " 'January', 'February'\n", + "])\n", + " \n", + "policy_pivot\n" + ] + }, + { + "cell_type": "markdown", + "id": "b6aec097-c633-4017-a125-e77a97259cda", + "metadata": { + "id": "b6aec097-c633-4017-a125-e77a97259cda" + }, + "source": [ + "6. Display a new DataFrame that contains the number of policies sold by month, by state, for the top 3 states with the highest number of policies sold.\n", + "\n", + "*Hint:*\n", + "- *To accomplish this, you will first need to group the data by state and month, then count the number of policies sold for each group. Afterwards, you will need to sort the data by the count of policies sold in descending order.*\n", + "- *Next, you will select the top 3 states with the highest number of policies sold.*\n", + "- *Finally, you will create a new DataFrame that contains the number of policies sold by month for each of the top 3 states.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de5ac07e", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "b6aec097-c633-4017-a125-e77a97259cda", - "metadata": { - "id": "b6aec097-c633-4017-a125-e77a97259cda" + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "State", + "rawType": "object", + "type": "string" + }, + { + "name": "Month", + "rawType": "object", + "type": "string" + }, + { + "name": "Number of Policies", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "79f202be-b39a-445e-ac9f-84ff2347af07", + "rows": [ + [ + "3", + "California", + "January", + "5673" + ], + [ + "2", + "California", + "February", + "4929" + ], + [ + "7", + "Oregon", + "January", + "4697" + ], + [ + "6", + "Oregon", + "February", + "3969" + ], + [ + "1", + "Arizona", + "January", + "3052" + ], + [ + "0", + "Arizona", + "February", + "2864" + ], + [ + "5", + "Nevada", + "January", + "1493" + ], + [ + "9", + "Washington", + "January", + "1358" + ], + [ + "4", + "Nevada", + "February", + "1278" + ], + [ + "8", + "Washington", + "February", + "1225" + ] + ], + "shape": { + "columns": 3, + "rows": 10 + } }, - "source": [ - "6. Display a new DataFrame that contains the number of policies sold by month, by state, for the top 3 states with the highest number of policies sold.\n", - "\n", - "*Hint:*\n", - "- *To accomplish this, you will first need to group the data by state and month, then count the number of policies sold for each group. Afterwards, you will need to sort the data by the count of policies sold in descending order.*\n", - "- *Next, you will select the top 3 states with the highest number of policies sold.*\n", - "- *Finally, you will create a new DataFrame that contains the number of policies sold by month for each of the top 3 states.*" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateMonthNumber of Policies
3CaliforniaJanuary5673
2CaliforniaFebruary4929
7OregonJanuary4697
6OregonFebruary3969
1ArizonaJanuary3052
0ArizonaFebruary2864
5NevadaJanuary1493
9WashingtonJanuary1358
4NevadaFebruary1278
8WashingtonFebruary1225
\n", + "
" + ], + "text/plain": [ + " State Month Number of Policies\n", + "3 California January 5673\n", + "2 California February 4929\n", + "7 Oregon January 4697\n", + "6 Oregon February 3969\n", + "1 Arizona January 3052\n", + "0 Arizona February 2864\n", + "5 Nevada January 1493\n", + "9 Washington January 1358\n", + "4 Nevada February 1278\n", + "8 Washington February 1225" ] - }, + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#1. Group the data by state and month, then count the number of policies sold for each group.\n", + "# Afterwards, you will need to sort the data by the count of policies sold in descending order.\n", + "\n", + "policy_counts = df.groupby(['State', 'Month'])['Number of Policies'].sum().reset_index()\n", + "policy_counts = policy_counts.sort_values(by='Number of Policies', ascending=False)\n", + "policy_counts\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d404675", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009", - "metadata": { - "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009" + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "State", + "rawType": "object", + "type": "string" + }, + { + "name": "Number of Policies", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "4e617224-6ea1-47da-9f8b-69dbbb133bd6", + "rows": [ + [ + "0", + "California", + "10602" + ], + [ + "1", + "Oregon", + "8666" + ], + [ + "2", + "Arizona", + "5916" + ] + ], + "shape": { + "columns": 2, + "rows": 3 + } }, - "source": [ - "7. The marketing team wants to analyze the effect of different marketing channels on the customer response rate.\n", - "\n", - "Hint: You can use melt to unpivot the data and create a table that shows the customer response rate (those who responded \"Yes\") by marketing channel." + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateNumber of Policies
0California10602
1Oregon8666
2Arizona5916
\n", + "
" + ], + "text/plain": [ + " State Number of Policies\n", + "0 California 10602\n", + "1 Oregon 8666\n", + "2 Arizona 5916" ] - }, + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#2. Select the top 3 states with the highest number of policies sold.\n", + "\n", + "top_states = policy_counts.groupby('State')['Number of Policies'].sum().nlargest(3).reset_index()\n", + "top_states" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0a037d7", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d", - "metadata": { - "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d" + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "State", + "rawType": "object", + "type": "string" + }, + { + "name": "Month", + "rawType": "object", + "type": "string" + }, + { + "name": "Number of Policies", + "rawType": "int64", + "type": "integer" + } + ], + "ref": "306f6a29-68a0-437a-88ce-ddab0bc88542", + "rows": [ + [ + "3", + "California", + "January", + "5673" + ], + [ + "2", + "California", + "February", + "4929" + ], + [ + "7", + "Oregon", + "January", + "4697" + ], + [ + "6", + "Oregon", + "February", + "3969" + ], + [ + "1", + "Arizona", + "January", + "3052" + ], + [ + "0", + "Arizona", + "February", + "2864" + ] + ], + "shape": { + "columns": 3, + "rows": 6 + } }, - "source": [ - "External Resources for Data Filtering: https://towardsdatascience.com/filtering-data-frames-in-pandas-b570b1f834b9" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateMonthNumber of Policies
3CaliforniaJanuary5673
2CaliforniaFebruary4929
7OregonJanuary4697
6OregonFebruary3969
1ArizonaJanuary3052
0ArizonaFebruary2864
\n", + "
" + ], + "text/plain": [ + " State Month Number of Policies\n", + "3 California January 5673\n", + "2 California February 4929\n", + "7 Oregon January 4697\n", + "6 Oregon February 3969\n", + "1 Arizona January 3052\n", + "0 Arizona February 2864" ] - }, + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#3. Create a new DataFrame that contains the number of policies sold by month for each of the top 3 states.\n", + "\n", + "state_list = top_states['State'].tolist()\n", + "top_state_policies = policy_counts[policy_counts['State'].isin(state_list)]\n", + "top_state_policies" + ] + }, + { + "cell_type": "markdown", + "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009", + "metadata": { + "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009" + }, + "source": [ + "7. The marketing team wants to analyze the effect of different marketing channels on the customer response rate.\n", + "\n", + "Hint: You can use melt to unpivot the data and create a table that shows the customer response rate (those who responded \"Yes\") by marketing channel." + ] + }, + { + "cell_type": "markdown", + "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d", + "metadata": { + "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d" + }, + "source": [ + "External Resources for Data Filtering: https://towardsdatascience.com/filtering-data-frames-in-pandas-b570b1f834b9" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e48c7914", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "449513f4-0459-46a0-a18d-9398d974c9ad", - "metadata": { - "id": "449513f4-0459-46a0-a18d-9398d974c9ad" + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Response", + "rawType": "object", + "type": "string" + }, + { + "name": "Marketing Channel", + "rawType": "object", + "type": "string" + }, + { + "name": "Channel Type", + "rawType": "object", + "type": "string" + } + ], + "ref": "15ee26d9-704a-48a9-be56-394beffb5a1f", + "rows": [ + [ + "0", + "No", + "Sales Channel", + "Agent" + ], + [ + "1", + "No", + "Sales Channel", + "Call Center" + ], + [ + "2", + "No", + "Sales Channel", + "Call Center" + ], + [ + "3", + "Yes", + "Sales Channel", + "Branch" + ], + [ + "4", + "No", + "Sales Channel", + "Branch" + ] + ], + "shape": { + "columns": 3, + "rows": 5 + } }, - "outputs": [], - "source": [ - "# your code goes here" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ResponseMarketing ChannelChannel Type
0NoSales ChannelAgent
1NoSales ChannelCall Center
2NoSales ChannelCall Center
3YesSales ChannelBranch
4NoSales ChannelBranch
\n", + "
" + ], + "text/plain": [ + " Response Marketing Channel Channel Type\n", + "0 No Sales Channel Agent\n", + "1 No Sales Channel Call Center\n", + "2 No Sales Channel Call Center\n", + "3 Yes Sales Channel Branch\n", + "4 No Sales Channel Branch" ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 + ], + "source": [ + "#7. The marketing team wants to analyze the effect of different marketing channels on the customer response rate.\n", + "\n", + "#Hint: You can use melt to unpivot the data and create a table that shows the customer response rate (those who responded \"Yes\") by marketing channel.\n", + "\n", + "melted_df = pd.melt(\n", + " df,\n", + " id_vars=['Response'],\n", + " value_vars=['Sales Channel'], #derretimos las columnas de canales de marketing\n", + " var_name='Marketing Channel', #nombre de variable de la nueva columna\n", + " value_name='Channel Type' #nombre de la nueva columna con los valores de Sales Channel\n", + ")\n", + "\n", + "melted_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd5bae2e", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Channel Type", + "rawType": "object", + "type": "string" + }, + { + "name": "Response Rate", + "rawType": "float64", + "type": "float" + } + ], + "ref": "64ba04f7-e048-4372-8e39-177970379a6f", + "rows": [ + [ + "0", + "Agent", + "0.1800533851007037" + ], + [ + "1", + "Branch", + "0.10787557908669755" + ], + [ + "2", + "Call Center", + "0.10322279308734236" + ], + [ + "3", + "Web", + "0.1088560885608856" + ] + ], + "shape": { + "columns": 2, + "rows": 4 + } }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Channel TypeResponse Rate
0Agent0.180053
1Branch0.107876
2Call Center0.103223
3Web0.108856
\n", + "
" + ], + "text/plain": [ + " Channel Type Response Rate\n", + "0 Agent 0.180053\n", + "1 Branch 0.107876\n", + "2 Call Center 0.103223\n", + "3 Web 0.108856" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" } + ], + "source": [ + "# Calcular el % de respuestas \"Yes\" por tipo de canal\n", + "response_rate = (\n", + " melted_df.groupby('Channel Type')['Response'] #agrupa todas las filas por cada tipo de canal y dentro de cada grupo, nos quedamos con la serie de respuestas\n", + " .apply(lambda x: (x == 'Yes').mean()) #aplica una funcion lambda que calcula el porcentaje de respuestas \"Yes\" en cada grupo\n", + " .reset_index(name='Response Rate') #crea un nuevo DataFrame con el resultado\n", + ")\n", + "\n", + "response_rate" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 5 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 }