diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb index fadd718..0b0a9e1 100644 --- a/lab-dw-aggregating.ipynb +++ b/lab-dw-aggregating.ipynb @@ -115,6 +115,35 @@ "Hint: You can use melt to unpivot the data and create a table that shows the customer response rate (those who responded \"Yes\") by marketing channel." ] }, + { + "cell_type": "code", + "execution_count": 32, + "id": "509e3e33", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Unnamed: 0', 'Customer', 'State', 'Customer Lifetime Value',\n", + " 'Response', 'Coverage', 'Education', 'Effective To Date',\n", + " 'EmploymentStatus', 'Gender', 'Income', 'Location Code',\n", + " 'Marital Status', 'Monthly Premium Auto', 'Months Since Last Claim',\n", + " 'Months Since Policy Inception', 'Number of Open Complaints',\n", + " 'Number of Policies', 'Policy Type', 'Policy', 'Renew Offer Type',\n", + " 'Sales Channel', 'Total Claim Amount', 'Vehicle Class', 'Vehicle Size',\n", + " 'Vehicle Type'],\n", + " dtype='object')" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, { "cell_type": "markdown", "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d", @@ -132,10 +161,235 @@ "metadata": { "id": "449513f4-0459-46a0-a18d-9398d974c9ad" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Unnamed: 0 Customer State Customer Lifetime Value Response \\\n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "8 8 FM55990 California 5989.773931 Yes \n", + "15 15 CW49887 California 4626.801093 Yes \n", + "19 19 NJ54277 California 3746.751625 Yes \n", + "27 27 MQ68407 Oregon 4376.363592 Yes \n", + "... ... ... ... ... ... \n", + "10844 10844 FM31768 Arizona 5979.724161 Yes \n", + "10852 10852 KZ80424 Washington 8382.478392 Yes \n", + "10872 10872 XT67997 California 5979.724161 Yes \n", + "10887 10887 BY78730 Oregon 8879.790017 Yes \n", + "10897 10897 MM70762 Arizona 9075.768214 Yes \n", + "\n", + " Coverage Education Effective To Date EmploymentStatus \\\n", + "3 Extended College 1/11/11 Employed \n", + "8 Premium College 1/19/11 Employed \n", + "15 Basic Master 1/16/11 Employed \n", + "19 Extended College 2/26/11 Employed \n", + "27 Premium Bachelor 2/28/11 Employed \n", + "... ... ... ... ... \n", + "10844 Extended High School or Below 2/7/11 Employed \n", + "10852 Basic Bachelor 1/27/11 Employed \n", + "10872 Extended High School or Below 2/7/11 Employed \n", + "10887 Basic High School or Below 2/3/11 Employed \n", + "10897 Basic Master 1/26/11 Employed \n", + "\n", + " Gender ... Number of Open Complaints Number of Policies \\\n", + "3 M ... 0.0 2 \n", + "8 M ... 0.0 1 \n", + "15 F ... 0.0 1 \n", + "19 F ... 1.0 1 \n", + "27 F ... 0.0 1 \n", + "... ... ... ... ... \n", + "10844 F ... 0.0 3 \n", + "10852 M ... 0.0 2 \n", + "10872 F ... 0.0 3 \n", + "10887 F ... 0.0 7 \n", + "10897 M ... 0.0 8 \n", + "\n", + " Policy Type Policy Renew Offer Type Sales Channel \\\n", + "3 Corporate Auto Corporate L3 Offer2 Branch \n", + "8 Personal Auto Personal L1 Offer2 Branch \n", + "15 Special Auto Special L1 Offer2 Branch \n", + "19 Personal Auto Personal L2 Offer2 Call Center \n", + "27 Personal Auto Personal L3 Offer2 Agent \n", + "... ... ... ... ... \n", + "10844 Personal Auto Personal L1 Offer2 Agent \n", + "10852 Personal Auto Personal L2 Offer2 Call Center \n", + "10872 Personal Auto Personal L3 Offer2 Agent \n", + "10887 Special Auto Special L2 Offer1 Agent \n", + "10897 Personal Auto Personal L1 Offer1 Agent \n", + "\n", + " Total Claim Amount Vehicle Class Vehicle Size Vehicle Type \n", + "3 484.013411 Four-Door Car Medsize A \n", + "8 739.200000 Sports Car Medsize NaN \n", + "15 547.200000 SUV Medsize NaN \n", + "19 19.575683 Two-Door Car Large A \n", + "27 60.036683 Four-Door Car Medsize NaN \n", + "... ... ... ... ... \n", + "10844 547.200000 Four-Door Car Medsize NaN \n", + "10852 791.878042 NaN NaN A \n", + "10872 547.200000 Four-Door Car Medsize NaN \n", + "10887 528.200860 SUV Small A \n", + "10897 158.077504 Sports Car Medsize A \n", + "\n", + "[1399 rows x 26 columns]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EducationGenderMax CLVMin CLVMedian CLV
0BachelorF73225.956521904.0008525640.505303
1BachelorM67907.270501898.0076755548.031892
2CollegeF61850.188031898.6836865623.611187
3CollegeM61134.683071918.1197006005.847375
4DoctorF44856.113972395.5700005332.462694
\n", + "
" + ], + "text/plain": [ + " Education Gender Max CLV Min CLV Median CLV\n", + "0 Bachelor F 73225.95652 1904.000852 5640.505303\n", + "1 Bachelor M 67907.27050 1898.007675 5548.031892\n", + "2 College F 61850.18803 1898.683686 5623.611187\n", + "3 College M 61134.68307 1918.119700 6005.847375\n", + "4 Doctor F 44856.11397 2395.570000 5332.462694" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code goes here" + "import pandas as pd\n", + "url = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv'\n", + "df = pd.read_csv(url)\n", + "\n", + "#new df\n", + "# 1. Create a new DataFrame that only includes customers who:\n", + "# - have a **low total_claim_amount** (e.g., below $1,000),\n", + "# - have a response \"Yes\" to the last marketing campaign.\n", + "\n", + "\n", + "df['Total Claim Amount'] = pd.to_numeric(df['Total Claim Amount'], errors='coerce')\n", + "\n", + "def filter_values(row):\n", + " if row['Response'] == 'Yes' and row['Total Claim Amount'] < 1000:\n", + " return True\n", + " return False\n", + "filtered_df = df[['Customer','Response','Total Claim Amount']]\n", + "filtered_df = df[df.apply(filter_values, axis=1)]\n", + "\n", + "print(filtered_df)\n", + "\n", + "# 2. Using the original Dataframe, analyze:\n", + "# - the average `monthly_premium` and/or customer lifetime value by `policy_type` and `gender` for customers who responded \"Yes\", and\n", + "# - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company.\n", + "\n", + "def filter_values_yes_only(row):\n", + " return row['Response'] == 'Yes'\n", + "\n", + "filtered_df2 = df[df.apply(filter_values_yes_only, axis=1)]\n", + "filtered_df2 = filtered_df2[['Customer','Response','Monthly Premium Auto','Customer Lifetime Value','Policy Type','Gender','Total Claim Amount']]\n", + "\n", + "grouped_stats = filtered_df2.groupby(['Policy Type', 'Gender']).agg({\n", + " 'Monthly Premium Auto': 'mean',\n", + " 'Customer Lifetime Value': 'mean',\n", + " 'Total Claim Amount': 'mean'\n", + "}).reset_index()\n", + "\n", + "grouped_stats['Avg Monthly Premium'] = grouped_stats['Monthly Premium Auto']\n", + "grouped_stats['Avg CLV'] = grouped_stats['Customer Lifetime Value']\n", + "grouped_stats['Avg Total Claim Amount'] = grouped_stats['Total Claim Amount']\n", + "\n", + "grouped_stats.head()\n", + "\n", + "# 3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers.\n", + "filtered_df3 = df[['Customer', 'State']]\n", + "state_counts = filtered_df3['State'].value_counts().reset_index()\n", + "state_counts.columns = ['State','Customer Count']\n", + "state_counts = state_counts[state_counts['Customer Count'] > 500]\n", + "\n", + "state_counts.head()\n", + "\n", + "# 4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions.\n", + "filtered_df4 = df[['Customer Lifetime Value', 'Education','Gender']]\n", + "grouped_stats2 = filtered_df4.groupby(['Education', 'Gender']).agg({\n", + " 'Customer Lifetime Value': ['max','min','median']\n", + "\n", + "}).reset_index()\n", + "grouped_stats2.columns = ['Education', 'Gender','Max CLV','Min CLV','Median CLV']\n", + "grouped_stats2.head()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d75abbb1", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -143,7 +397,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -157,7 +411,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.12.2" } }, "nbformat": 4,