From 6e6ce833a23515b5755ae1859f11b867b7d04d92 Mon Sep 17 00:00:00 2001 From: Lewis Clark Date: Sun, 3 Aug 2025 17:15:06 +0200 Subject: [PATCH] Lab completed --- .DS_Store | Bin 0 -> 6148 bytes LabsDataAggregationAndFiltering.ipynb | 815 ++++++++++++++++++++++++++ 2 files changed, 815 insertions(+) create mode 100644 .DS_Store create mode 100644 LabsDataAggregationAndFiltering.ipynb diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..862f74fe68c35ab1efb11660e5725d733efbf365 GIT binary patch literal 6148 zcmeHKu};H441IDES^^vKm}kH3CMtG91<_yK|q1ww&PAQT7% z{+R;Wduj6%#~4F_P#_feR6xHEiLRIn4vzNe;9yq(;*7Kz=XRGMCMAfu;NVDxCZ0<4 zR4G>s@pSU3#N~p6qo+gcWY#GsOSz$lolZVkI3zp97z%^}V+D?VS!w@Y(to)BkCS2* z3WNfGN&%VGxAlUrRC?>=<+Rr(`W^kl7;E7itQ8Zj6?37j`1DI$(mD5Y!NJke$y+)x OegssPuu$MP6nF=hdoHH{ literal 0 HcmV?d00001 diff --git a/LabsDataAggregationAndFiltering.ipynb b/LabsDataAggregationAndFiltering.ipynb new file mode 100644 index 0000000..399eb33 --- /dev/null +++ b/LabsDataAggregationAndFiltering.ipynb @@ -0,0 +1,815 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e28ddcd1-14ee-4f34-90a7-0387b77af02b", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "41c1fe74-ae67-4c01-b8b5-5ea331225a4f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0CustomerStateCustomer Lifetime ValueResponseCoverageEducationEffective To DateEmploymentStatusGender...Number of Open ComplaintsNumber of PoliciesPolicy TypePolicyRenew Offer TypeSales ChannelTotal Claim AmountVehicle ClassVehicle SizeVehicle Type
00DK49336Arizona4809.216960NoBasicCollege2/18/11EmployedM...0.09Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeNaN
11KX64629California2228.525238NoBasicCollege1/18/11UnemployedF...0.01Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeNaN
22LZ68649Washington14947.917300NoBasicBachelor2/10/11EmployedM...0.02Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA
\n", + "

3 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 Customer State Customer Lifetime Value Response Coverage \\\n", + "0 0 DK49336 Arizona 4809.216960 No Basic \n", + "1 1 KX64629 California 2228.525238 No Basic \n", + "2 2 LZ68649 Washington 14947.917300 No Basic \n", + "\n", + " Education Effective To Date EmploymentStatus Gender ... \\\n", + "0 College 2/18/11 Employed M ... \n", + "1 College 1/18/11 Unemployed F ... \n", + "2 Bachelor 2/10/11 Employed M ... \n", + "\n", + " Number of Open Complaints Number of Policies Policy Type Policy \\\n", + "0 0.0 9 Corporate Auto Corporate L3 \n", + "1 0.0 1 Personal Auto Personal L3 \n", + "2 0.0 2 Personal Auto Personal L3 \n", + "\n", + " Renew Offer Type Sales Channel Total Claim Amount Vehicle Class \\\n", + "0 Offer3 Agent 292.800000 Four-Door Car \n", + "1 Offer4 Call Center 744.924331 Four-Door Car \n", + "2 Offer3 Call Center 480.000000 SUV \n", + "\n", + " Vehicle Size Vehicle Type \n", + "0 Medsize NaN \n", + "1 Medsize NaN \n", + "2 Medsize A \n", + "\n", + "[3 rows x 26 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n", + "df = pd.read_csv(url)\n", + "df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "17c31865-6c31-4b8a-86e5-fc124ad8c5b7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10910, 26)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "177f4cdd-9cec-4cab-ad95-04e6b6e9af80", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0CustomerStateCustomer Lifetime ValueResponseCoverageEducationEffective To DateEmploymentStatusGender...Number of Open ComplaintsNumber of PoliciesPolicy TypePolicyRenew Offer TypeSales ChannelTotal Claim AmountVehicle ClassVehicle SizeVehicle Type
33XL78013Oregon22332.439460YesExtendedCollege1/11/11EmployedM...0.02Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA
88FM55990California5989.773931YesPremiumCollege1/19/11EmployedM...0.01Personal AutoPersonal L1Offer2Branch739.200000Sports CarMedsizeNaN
1515CW49887California4626.801093YesBasicMaster1/16/11EmployedF...0.01Special AutoSpecial L1Offer2Branch547.200000SUVMedsizeNaN
\n", + "

3 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 Customer State Customer Lifetime Value Response \\\n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "8 8 FM55990 California 5989.773931 Yes \n", + "15 15 CW49887 California 4626.801093 Yes \n", + "\n", + " Coverage Education Effective To Date EmploymentStatus Gender ... \\\n", + "3 Extended College 1/11/11 Employed M ... \n", + "8 Premium College 1/19/11 Employed M ... \n", + "15 Basic Master 1/16/11 Employed F ... \n", + "\n", + " Number of Open Complaints Number of Policies Policy Type \\\n", + "3 0.0 2 Corporate Auto \n", + "8 0.0 1 Personal Auto \n", + "15 0.0 1 Special Auto \n", + "\n", + " Policy Renew Offer Type Sales Channel Total Claim Amount \\\n", + "3 Corporate L3 Offer2 Branch 484.013411 \n", + "8 Personal L1 Offer2 Branch 739.200000 \n", + "15 Special L1 Offer2 Branch 547.200000 \n", + "\n", + " Vehicle Class Vehicle Size Vehicle Type \n", + "3 Four-Door Car Medsize A \n", + "8 Sports Car Medsize NaN \n", + "15 SUV Medsize NaN \n", + "\n", + "[3 rows x 26 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_df = df[(df['Total Claim Amount'] < 1000) & (df['Response'] == 'Yes')]\n", + "filtered_df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "1170df77-a367-495b-b1d3-169655e3d37d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1399, 26)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "ebbd3eee-c141-4b7e-8582-7ac0d781c996", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Policy TypeGenderMonthly Premium AutoCustomer Lifetime ValueTotal Claim Amount
0Corporate AutoF94.3017757712.628736433.738499
1Corporate AutoM92.1883127944.465414408.582459
2Personal AutoF98.9981488339.791842452.965929
3Personal AutoM91.0858217448.383281457.010178
4Special AutoF92.3142867691.584111453.280164
5Special AutoM86.3437508247.088702429.527942
\n", + "
" + ], + "text/plain": [ + " Policy Type Gender Monthly Premium Auto Customer Lifetime Value \\\n", + "0 Corporate Auto F 94.301775 7712.628736 \n", + "1 Corporate Auto M 92.188312 7944.465414 \n", + "2 Personal Auto F 98.998148 8339.791842 \n", + "3 Personal Auto M 91.085821 7448.383281 \n", + "4 Special Auto F 92.314286 7691.584111 \n", + "5 Special Auto M 86.343750 8247.088702 \n", + "\n", + " Total Claim Amount \n", + "0 433.738499 \n", + "1 408.582459 \n", + "2 452.965929 \n", + "3 457.010178 \n", + "4 453.280164 \n", + "5 429.527942 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "responded_df = df[df['Response'] == 'Yes']\n", + "\n", + "summary = responded_df.groupby(['Policy Type', 'Gender'])[\n", + " ['Monthly Premium Auto', 'Customer Lifetime Value', 'Total Claim Amount']\n", + "].mean().reset_index()\n", + "\n", + "summary" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "e28118ae-583d-4604-b5d7-d64b062fd3bf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(6, 5)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "summary.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "06854e42-1fa1-47df-bcc1-b73ac1ea89e9", + "metadata": {}, + "outputs": [], + "source": [ + "### Corporate Auto males are more profitable with high value and little claims and \n", + "### personal auto males have low value and high claims: this makes them riskier." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "1df6f892-32ca-4ffd-b3a5-f8d16e88f7de", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StateCustomer Count
0California3552
1Oregon2909
2Arizona1937
3Nevada993
4Washington888
\n", + "
" + ], + "text/plain": [ + " State Customer Count\n", + "0 California 3552\n", + "1 Oregon 2909\n", + "2 Arizona 1937\n", + "3 Nevada 993\n", + "4 Washington 888" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "customers_per_state = df['State'].value_counts()\n", + "\n", + "states_over_500 = customers_per_state[customers_per_state > 500]\n", + "\n", + "states_over_500_df = states_over_500.reset_index()\n", + "states_over_500_df.columns = ['State', 'Customer Count']\n", + "states_over_500_df" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "add8973a-d24e-44c4-8fc1-240f14e4f175", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EducationGendermaxminmedian
0BachelorF73225.956521904.0008525640.505303
1BachelorM67907.270501898.0076755548.031892
2CollegeF61850.188031898.6836865623.611187
3CollegeM61134.683071918.1197006005.847375
4DoctorF44856.113972395.5700005332.462694
5DoctorM32677.342842267.6040385577.669457
6High School or BelowF55277.445892144.9215356039.553187
7High School or BelowM83325.381191940.9812216286.731006
8MasterF51016.067042417.7770325729.855012
9MasterM50568.259122272.3073105579.099207
\n", + "
" + ], + "text/plain": [ + " Education Gender max min median\n", + "0 Bachelor F 73225.95652 1904.000852 5640.505303\n", + "1 Bachelor M 67907.27050 1898.007675 5548.031892\n", + "2 College F 61850.18803 1898.683686 5623.611187\n", + "3 College M 61134.68307 1918.119700 6005.847375\n", + "4 Doctor F 44856.11397 2395.570000 5332.462694\n", + "5 Doctor M 32677.34284 2267.604038 5577.669457\n", + "6 High School or Below F 55277.44589 2144.921535 6039.553187\n", + "7 High School or Below M 83325.38119 1940.981221 6286.731006\n", + "8 Master F 51016.06704 2417.777032 5729.855012\n", + "9 Master M 50568.25912 2272.307310 5579.099207" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "stats = df.groupby(['Education', 'Gender'])['Customer Lifetime Value'].agg(['max', 'min', 'median']).reset_index()\n", + "\n", + "stats" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "ef29969f-c472-49f5-893f-c0b0f5025968", + "metadata": {}, + "outputs": [], + "source": [ + "### Bachelor’s degree holders tend to have the highest maximum customer lifetime value, \n", + "### while median values are the same across all educaton levels and genders." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16a21305-728b-42de-a854-44ce4ba5ab87", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:base] *", + "language": "python", + "name": "conda-base-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}