From 49ca7a36d69b6638400973e8c459233c2e579d74 Mon Sep 17 00:00:00 2001 From: ruiparreira75 Date: Wed, 26 Nov 2025 22:31:41 +0000 Subject: [PATCH] lab complete v1 --- lab-dw-data-structuring-and-combining.ipynb | 2783 ++++++++++++++++++- 1 file changed, 2775 insertions(+), 8 deletions(-) diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb index ec4e3f9..c75cb08 100644 --- a/lab-dw-data-structuring-and-combining.ipynb +++ b/lab-dw-data-structuring-and-combining.ipynb @@ -36,14 +36,1686 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "492d06e3-92c7-4105-ac72-536db98d3244", "metadata": { "id": "492d06e3-92c7-4105-ac72-536db98d3244" }, "outputs": [], "source": [ - "# Your code goes here" + "import pandas as pd\n", + "import numpy as np\n", + "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "afd67ce0-7826-40ee-8e12-7b53e75b09bf", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(r\"C:\\Users\\bootcamp\\week2\\day3\\lab-dw-data-structuring-and-combining\\filename_without_nas.csv\")\n", + "df2 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\")\n", + "df3 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a0c3beef-61bd-49b3-b74a-e8053e81d56b", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"filename_without_nas.csv\").drop(columns=[\"Unnamed: 0\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1578ff78-65dc-4b8e-a690-22537aca220a", + "metadata": {}, + "outputs": [], + "source": [ + "df = df.rename(columns={\"Income\": \"income\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0923adcb-fa4f-4c56-8abe-0263381c4724", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "df46fe8b-f844-427f-a58c-9a10d117f65f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcust_life_valueincomemont_prem_valnum_open_comppolicy_typevehicle_classtot_claim_amount
0RB50392WashingtonUnknownMaster588174010000Personal AutoFour-Door Car3
1QZ44356ArizonafemaleBachelor6979540940Personal AutoFour-Door Car1131
2AI49188NevadafemaleBachelor1288743487671080Personal AutoTwo-Door Car566
3WW63253CaliforniaMaleBachelor76458601060Corporate AutoSUV530
4GA49547WashingtonMaleHigh School or Below53630836357680Personal AutoFour-Door Car17
\n", + "
" + ], + "text/plain": [ + " customer state gender education cust_life_value \\\n", + "0 RB50392 Washington Unknown Master 588174 \n", + "1 QZ44356 Arizona female Bachelor 697954 \n", + "2 AI49188 Nevada female Bachelor 1288743 \n", + "3 WW63253 California Male Bachelor 764586 \n", + "4 GA49547 Washington Male High School or Below 536308 \n", + "\n", + " income mont_prem_val num_open_comp policy_type vehicle_class \\\n", + "0 0 1000 0 Personal Auto Four-Door Car \n", + "1 0 94 0 Personal Auto Four-Door Car \n", + "2 48767 108 0 Personal Auto Two-Door Car \n", + "3 0 106 0 Corporate Auto SUV \n", + "4 36357 68 0 Personal Auto Four-Door Car \n", + "\n", + " tot_claim_amount \n", + "0 3 \n", + "1 1131 \n", + "2 566 \n", + "3 530 \n", + "4 17 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "bee10092-c6ef-4167-a7e1-1aeda9729f97", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsTotal Claim AmountPolicy TypeVehicle Class
0GS98873ArizonaFBachelor323912.47%16061881/0/00633.6Personal AutoFour-Door Car
1CW49887CaliforniaFMaster462680.11%794871141/0/00547.2Special AutoSUV
2MY31220CaliforniaFCollege899704.02%542301121/0/00537.6Personal AutoTwo-Door Car
3UH35128OregonFCollege2580706.30%712102141/1/001027.2Personal AutoLuxury Car
4WH52799ArizonaFCollege380812.21%94903941/0/00451.2Corporate AutoTwo-Door Car
\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value Income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n", + "1 CW49887 California F Master 462680.11% 79487 \n", + "2 MY31220 California F College 899704.02% 54230 \n", + "3 UH35128 Oregon F College 2580706.30% 71210 \n", + "4 WH52799 Arizona F College 380812.21% 94903 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Total Claim Amount \\\n", + "0 88 1/0/00 633.6 \n", + "1 114 1/0/00 547.2 \n", + "2 112 1/0/00 537.6 \n", + "3 214 1/1/00 1027.2 \n", + "4 94 1/0/00 451.2 \n", + "\n", + " Policy Type Vehicle Class \n", + "0 Personal Auto Four-Door Car \n", + "1 Special Auto SUV \n", + "2 Personal Auto Two-Door Car \n", + "3 Personal Auto Luxury Car \n", + "4 Corporate Auto Two-Door Car " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b5be45b7-40a3-4273-8779-9895f8b5904d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerStateCustomer Lifetime ValueEducationGenderIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeTotal Claim AmountVehicle Class
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.200000Two-Door Car
1TB86706Arizona2502.637401MasterM0660Personal Auto3.468912Two-Door Car
2ZL73902Nevada3265.156348BachelorF25820820Personal Auto393.600000Four-Door Car
3KX23516California4455.843406High School or BelowF01210Personal Auto699.615192SUV
4FN77294California7704.958480High School or BelowM303661012Personal Auto484.800000SUV
\n", + "
" + ], + "text/plain": [ + " Customer State Customer Lifetime Value Education Gender \\\n", + "0 SA25987 Washington 3479.137523 High School or Below M \n", + "1 TB86706 Arizona 2502.637401 Master M \n", + "2 ZL73902 Nevada 3265.156348 Bachelor F \n", + "3 KX23516 California 4455.843406 High School or Below F \n", + "4 FN77294 California 7704.958480 High School or Below M \n", + "\n", + " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "0 0 104 0 Personal Auto \n", + "1 0 66 0 Personal Auto \n", + "2 25820 82 0 Personal Auto \n", + "3 0 121 0 Personal Auto \n", + "4 30366 101 2 Personal Auto \n", + "\n", + " Total Claim Amount Vehicle Class \n", + "0 499.200000 Two-Door Car \n", + "1 3.468912 Two-Door Car \n", + "2 393.600000 Four-Door Car \n", + "3 699.615192 SUV \n", + "4 484.800000 SUV " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "21aa3ed2-6c23-4b00-860b-5474c1e31e6e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11 Index(['customer', 'state', 'gender', 'education', 'cust_life_value', 'income',\n", + " 'mont_prem_val', 'num_open_comp', 'policy_type', 'vehicle_class',\n", + " 'tot_claim_amount'],\n", + " dtype='object')\n", + "11 Index(['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value',\n", + " 'Income', 'Monthly Premium Auto', 'Number of Open Complaints',\n", + " 'Total Claim Amount', 'Policy Type', 'Vehicle Class'],\n", + " dtype='object')\n", + "11 Index(['Customer', 'State', 'Customer Lifetime Value', 'Education', 'Gender',\n", + " 'Income', 'Monthly Premium Auto', 'Number of Open Complaints',\n", + " 'Policy Type', 'Total Claim Amount', 'Vehicle Class'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "print(len(df.columns), df.columns)\n", + "print(len(df2.columns), df2.columns)\n", + "print(len(df3.columns), df3.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6b86014c-0e1e-46a4-84a9-4dbfc23c8fe6", + "metadata": {}, + "outputs": [], + "source": [ + "df2.columns = df.columns\n", + "df3.columns = df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "eb3ad418-151c-4ab5-a2d6-4a10687ee129", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcust_life_valueincomemont_prem_valnum_open_comppolicy_typevehicle_classtot_claim_amount
0RB50392WashingtonUnknownMaster588174010000Personal AutoFour-Door Car3
\n", + "
" + ], + "text/plain": [ + " customer state gender education cust_life_value income \\\n", + "0 RB50392 Washington Unknown Master 588174 0 \n", + "\n", + " mont_prem_val num_open_comp policy_type vehicle_class \\\n", + "0 1000 0 Personal Auto Four-Door Car \n", + "\n", + " tot_claim_amount \n", + "0 3 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "6e77f71d-2f52-4182-8f47-a501865a0797", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcust_life_valueincomemont_prem_valnum_open_comppolicy_typevehicle_classtot_claim_amount
0GS98873ArizonaFBachelor323912.47%16061881/0/00633.6Personal AutoFour-Door Car
\n", + "
" + ], + "text/plain": [ + " customer state gender education cust_life_value income mont_prem_val \\\n", + "0 GS98873 Arizona F Bachelor 323912.47% 16061 88 \n", + "\n", + " num_open_comp policy_type vehicle_class tot_claim_amount \n", + "0 1/0/00 633.6 Personal Auto Four-Door Car " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.head(1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4747d983-e690-4e8e-af27-10ead536ba75", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcust_life_valueincomemont_prem_valnum_open_comppolicy_typevehicle_classtot_claim_amount
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.2Two-Door Car
\n", + "
" + ], + "text/plain": [ + " customer state gender education cust_life_value \\\n", + "0 SA25987 Washington 3479.137523 High School or Below M \n", + "\n", + " income mont_prem_val num_open_comp policy_type vehicle_class \\\n", + "0 0 104 0 Personal Auto 499.2 \n", + "\n", + " tot_claim_amount \n", + "0 Two-Door Car " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "bd47efc6-5b20-4d05-abe0-85037c0197f3", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined = pd.concat([df, df2, df3], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f9cf4282-fbd8-41ac-9063-b94761a8e939", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcust_life_valueincomemont_prem_valnum_open_comppolicy_typevehicle_classtot_claim_amount
0RB50392WashingtonUnknownMaster588174010000Personal AutoFour-Door Car3
1QZ44356ArizonafemaleBachelor6979540940Personal AutoFour-Door Car1131
2AI49188NevadafemaleBachelor1288743487671080Personal AutoTwo-Door Car566
3WW63253CaliforniaMaleBachelor76458601060Corporate AutoSUV530
4GA49547WashingtonMaleHigh School or Below53630836357680Personal AutoFour-Door Car17
\n", + "
" + ], + "text/plain": [ + " customer state gender education cust_life_value income \\\n", + "0 RB50392 Washington Unknown Master 588174 0 \n", + "1 QZ44356 Arizona female Bachelor 697954 0 \n", + "2 AI49188 Nevada female Bachelor 1288743 48767 \n", + "3 WW63253 California Male Bachelor 764586 0 \n", + "4 GA49547 Washington Male High School or Below 536308 36357 \n", + "\n", + " mont_prem_val num_open_comp policy_type vehicle_class tot_claim_amount \n", + "0 1000 0 Personal Auto Four-Door Car 3 \n", + "1 94 0 Personal Auto Four-Door Car 1131 \n", + "2 108 0 Personal Auto Two-Door Car 566 \n", + "3 106 0 Corporate Auto SUV 530 \n", + "4 68 0 Personal Auto Four-Door Car 17 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "fde76456-fe4e-4c2b-9410-c92665342013", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 9138 entries, 0 to 9137\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 customer 9138 non-null object\n", + " 1 state 9138 non-null object\n", + " 2 gender 9133 non-null object\n", + " 3 education 9138 non-null object\n", + " 4 cust_life_value 9134 non-null object\n", + " 5 income 9138 non-null int64 \n", + " 6 mont_prem_val 9138 non-null int64 \n", + " 7 num_open_comp 9138 non-null object\n", + " 8 policy_type 9138 non-null object\n", + " 9 vehicle_class 9138 non-null object\n", + " 10 tot_claim_amount 9138 non-null object\n", + "dtypes: int64(2), object(9)\n", + "memory usage: 785.4+ KB\n" + ] + } + ], + "source": [ + "df_combined.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "07b38940-e11a-48f6-8aee-57b4259fd85e", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined.columns = df_combined.columns.str.strip()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "636a2fbc-674c-4bda-8999-8b454a682c09", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "state\n", + "California 3152\n", + "Oregon 2601\n", + "Arizona 1655\n", + "Nevada 882\n", + "Washington 798\n", + "AZ 49\n", + "Unknown 1\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined['state'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "61b129c5-3772-4b51-91fb-4e56431b00da", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined[\"state\"] = df_combined[\"state\"].replace({\"Cali\": \"California\", \"AZ\": \"Arizona\", \"WA\": \"Washington\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "3d5c8401-92c2-4a33-8c52-5e59c510cb1c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "gender\n", + "F 527\n", + "female 504\n", + "M 461\n", + "Male 414\n", + "Unknown 118\n", + " ... \n", + "6516.214888 1\n", + "4943.386117 1\n", + "2585.955652 1\n", + "2750.705045 1\n", + "3622.872124 1\n", + "Name: count, Length: 6294, dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined['gender'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "6253217a-70fb-4461-a863-159c1b6c8aed", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined['gender'] = df_combined['gender'].replace({\"M\": \"Male\", \"F\": \"female\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "52f94c46-1fb2-4912-ade1-3fb34cb2e195", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined[\"gender\"] = df_combined[\"gender\"].str.strip().str.lower()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "e0f0e922-5130-4304-a885-6025630bc541", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "education\n", + "Bachelor 2726\n", + "College 2682\n", + "High School or Below 2616\n", + "Master 752\n", + "Doctor 344\n", + "Bachelors 17\n", + "Unknown 1\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined['education'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "859aed46-ed9a-4632-be8c-3dd91040fa42", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined[\"education\"] = df_combined[\"education\"].str.strip().str.lower()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "a5633e2a-2698-4f3c-bff7-c998ee4cf492", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined['education'] = df_combined['education'].replace({\"bachelors\": \"bachelor\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "d5cf98e6-34d8-47c0-ab1b-7fdb2b08c640", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined[\"cust_life_value\"] = df_combined[\"cust_life_value\"].replace({\"%\": \"\"}, regex=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "899eb4c6-cbbc-44a8-872a-150f51ceac21", + "metadata": {}, + "outputs": [], + "source": [ + "mask_gender = df_combined['cust_life_value'].isin(['F','M'])\n", + "df_combined.loc[mask_gender, 'gender'] = df.loc[mask_gender, 'cust_life_value']\n", + "df_combined.loc[mask_gender, 'cust_life_value'] = pd.NA" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "afcad018-f842-4071-93d4-816153094221", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined['cust_life_value'] = pd.to_numeric(df_combined['cust_life_value'], errors='coerce')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5f7641c-3874-48be-9683-72447c58626c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "05e8bc98-ee64-4f0a-8d42-0ac0f26cfc2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "gender\n", + "female 1031\n", + "male 914\n", + "unknown 118\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined['gender'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "9d491c75-aaf0-44f7-9d21-37657670ffc8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 588174. 697954. 1288743. ... 568964.41 368672.38 399258.39]\n" + ] + } + ], + "source": [ + "print(df_combined['cust_life_value'].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "faeeed8a-63ab-4127-8183-c43ac37dd21b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "cust_life_value\n", + "445811.00 4\n", + "588174.00 4\n", + "251459.00 4\n", + "552821.28 4\n", + "477294.00 3\n", + " ..\n", + "2009689.34 1\n", + "582168.43 1\n", + "309580.34 1\n", + "1059854.25 1\n", + "394637.00 1\n", + "Name: count, Length: 1978, dtype: int64" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined['cust_life_value'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "003bb0a4-6fac-4fff-9342-1eea16830266", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined['cust_life_value'] = pd.to_numeric(df_combined['cust_life_value'], errors='coerce')" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "dd74d2f1-418e-431f-b56c-2b2849eea697", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined = df_combined.dropna(subset=['cust_life_value'])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "a693e918-4413-4b9c-8d17-44297661d998", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 2.064000e+03\n", + "mean 7.795747e+05\n", + "std 6.324232e+05\n", + "min 2.004350e+05\n", + "25% 3.981483e+05\n", + "50% 5.730965e+05\n", + "75% 8.803855e+05\n", + "max 5.816655e+06\n", + "Name: cust_life_value, dtype: float64" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined['cust_life_value'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "8e810a7c-9877-4652-bf58-f85bc8c8ea78", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 2064 entries, 0 to 2067\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 customer 2064 non-null object \n", + " 1 state 2064 non-null object \n", + " 2 gender 2060 non-null object \n", + " 3 education 2064 non-null object \n", + " 4 cust_life_value 2064 non-null float64\n", + " 5 income 2064 non-null int64 \n", + " 6 mont_prem_val 2064 non-null int64 \n", + " 7 num_open_comp 2064 non-null object \n", + " 8 policy_type 2064 non-null object \n", + " 9 vehicle_class 2064 non-null object \n", + " 10 tot_claim_amount 2064 non-null object \n", + "dtypes: float64(1), int64(2), object(8)\n", + "memory usage: 193.5+ KB\n" + ] + } + ], + "source": [ + "df_combined.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "05669297-b8e3-40c8-b8a1-c928f2d8d903", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 2064.000000\n", + "mean 39541.322190\n", + "std 30342.133631\n", + "min 0.000000\n", + "25% 15186.250000\n", + "50% 36509.000000\n", + "75% 64358.000000\n", + "max 99981.000000\n", + "Name: income, dtype: float64" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined['income'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "380248ef-7686-41d4-84eb-cf6a71d3d348", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 2064.000000\n", + "mean 169.348353\n", + "std 1219.967781\n", + "min 61.000000\n", + "25% 68.000000\n", + "50% 81.500000\n", + "75% 108.000000\n", + "max 35354.000000\n", + "Name: mont_prem_val, dtype: float64" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined['mont_prem_val'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "13a1e5b0-8c52-4519-ab3d-207fb6bec15d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 2, 1, 3, 5, 4, '1/0/00', '1/1/00', '1/3/00', '1/5/00', '1/2/00',\n", + " '1/4/00'], dtype=object)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined['num_open_comp'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "4024824a-761e-4696-a1f8-b1f7930fe492", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0 2 1 3 5 4 '1/0/00' '1/1/00' '1/3/00' '1/5/00' '1/2/00' '1/4/00']\n" + ] + } + ], + "source": [ + "print(df_combined[\"num_open_comp\"].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "0d10902f-a7f8-448a-8492-8e006e7c8d8a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0 2 1 3 5 4 '1/0/00' '1/1/00' '1/3/00' '1/5/00']\n" + ] + } + ], + "source": [ + "print(df_combined[\"num_open_comp\"].unique()[:10])" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "5420716c-7c3a-4075-8bfe-012eb7925cc3", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined[\"num_open_comp\"] = (\n", + " df_combined[\"num_open_comp\"].astype(str).str.split(\"/\").str[1]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "22eb2849-4386-4860-8806-6d7326ad7c00", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[nan '0' '1' '3' '5' '2' '4']\n" + ] + } + ], + "source": [ + "print(df_combined[\"num_open_comp\"].unique()[:10])" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "694bf433-ca66-4be9-a791-249091f0113e", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined[\"num_open_comp\"] = pd.to_numeric(\n", + " df_combined[\"num_open_comp\"], errors=\"coerce\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "f539d897-a05e-4133-99b0-b4bceebd10e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[nan 0. 1. 3. 5. 2. 4.]\n" + ] + } + ], + "source": [ + "print(df_combined[\"num_open_comp\"].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "6bf969d3-dfaf-45f4-a726-f9b81cb4c630", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined[\"num_open_comp\"] = df_combined[\"num_open_comp\"].fillna(df_combined[\"num_open_comp\"].median())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "1c328c66-a2a5-41dc-b6e9-183ec452d4d0", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined[\"policy_type\"] = df_combined[\"policy_type\"].str.strip().str.lower()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "f9b5498d-85bc-45af-ab99-d995b80c1b5a", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined = df_combined.dropna(subset=[\"policy_type\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "26c01b5a-b9ae-4700-ba98-dcccd8bedfbc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['personal auto', 'corporate auto', 'special auto'], dtype=object)" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined['policy_type'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "4288e43c-6ef3-4096-9f39-ec4a5ffa112d", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined[\"income\"] = df_combined[\"income\"].astype(float)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "e1f71cf7-54bc-4a07-9150-c1081c3a0bde", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 1072 entries, 0 to 1071\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 customer 1072 non-null object \n", + " 1 state 1072 non-null object \n", + " 2 gender 1072 non-null object \n", + " 3 education 1072 non-null object \n", + " 4 cust_life_value 1072 non-null float64\n", + " 5 income 1072 non-null float64\n", + " 6 mont_prem_val 1072 non-null int64 \n", + " 7 num_open_comp 1072 non-null float64\n", + " 8 policy_type 1072 non-null object \n", + " 9 vehicle_class 1072 non-null object \n", + " 10 tot_claim_amount 1072 non-null object \n", + "dtypes: float64(3), int64(1), object(7)\n", + "memory usage: 100.5+ KB\n" + ] + } + ], + "source": [ + "df_combined.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "daa491d2-d3fa-4ae6-a88e-c61587a291bd", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined[\"mont_prem_val\"] = df_combined[\"mont_prem_val\"].astype(float)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "846c4a2b-12f3-48fb-a2b1-864f041001fb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Four-Door Car', 'Two-Door Car', 'SUV', 'Luxury'], dtype=object)" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined['vehicle_class'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "4810bb76-f7e6-448e-932b-6c1a10841c35", + "metadata": {}, + "outputs": [], + "source": [ + "df_combined[\"tot_claim_amount\"] = df_combined[\"tot_claim_amount\"].astype(float)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "7fe04e6b-7df8-453e-b4ef-a7affe0e4686", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 1072 entries, 0 to 1071\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 customer 1072 non-null object \n", + " 1 state 1072 non-null object \n", + " 2 gender 1072 non-null object \n", + " 3 education 1072 non-null object \n", + " 4 cust_life_value 1072 non-null float64\n", + " 5 income 1072 non-null float64\n", + " 6 mont_prem_val 1072 non-null float64\n", + " 7 num_open_comp 1072 non-null float64\n", + " 8 policy_type 1072 non-null object \n", + " 9 vehicle_class 1072 non-null object \n", + " 10 tot_claim_amount 1072 non-null float64\n", + "dtypes: float64(5), object(6)\n", + "memory usage: 100.5+ KB\n" + ] + } + ], + "source": [ + "df_combined.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "22e03c97-6a21-43ea-9889-a5ca5351d5ac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 1072.000000\n", + "mean 404.933769\n", + "std 292.878582\n", + "min 0.000000\n", + "25% 202.500000\n", + "50% 355.000000\n", + "75% 533.000000\n", + "max 2893.000000\n", + "Name: tot_claim_amount, dtype: float64" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined['tot_claim_amount'].describe()" ] }, { @@ -70,16 +1742,1084 @@ "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by performing data cleaning, formatting, and structuring." ] }, + { + "cell_type": "markdown", + "id": "04564af4-3823-4e16-af23-ee562869bc1f", + "metadata": {}, + "source": [ + "1. You work at the marketing department and you want to know which sales channel brought the most sales in terms of total revenue. Using pivot, create a summary table showing the total revenue for each sales channel (branch, call center, web, and mail).\n", + "Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights." + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26", "metadata": { "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26" }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
00DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
11KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
22LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
33XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
44QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
\n", + "

5 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 9 Corporate Auto Corporate L3 Offer3 \n", + "1 1 Personal Auto Personal L3 Offer4 \n", + "2 2 Personal Auto Personal L3 Offer3 \n", + "3 2 Corporate Auto Corporate L3 Offer2 \n", + "4 7 Personal Auto Personal L2 Offer1 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "0 Agent 292.800000 Four-Door Car Medsize \n", + "1 Call Center 744.924331 Four-Door Car Medsize \n", + "2 Call Center 480.000000 SUV Medsize \n", + "3 Branch 484.013411 Four-Door Car Medsize \n", + "4 Branch 707.925645 Four-Door Car Medsize \n", + "\n", + " vehicle_type month \n", + "0 A 2 \n", + "1 A 1 \n", + "2 A 2 \n", + "3 A 1 \n", + "4 A 1 \n", + "\n", + "[5 rows x 27 columns]" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "marketing_customer_analysis = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\")\n", + "marketing_customer_analysis.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "a5a92b53-4572-4a36-9bd1-14f6997abeb5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['unnamed:_0', 'customer', 'state', 'customer_lifetime_value', 'response', 'coverage', 'education', 'effective_to_date', 'employmentstatus', 'gender', 'income', 'location_code', 'marital_status', 'monthly_premium_auto', 'months_since_last_claim', 'months_since_policy_inception', 'number_of_open_complaints', 'number_of_policies', 'policy_type', 'policy', 'renew_offer_type', 'sales_channel', 'total_claim_amount', 'vehicle_class', 'vehicle_size', 'vehicle_type', 'month']\n" + ] + } + ], + "source": [ + "print(marketing_customer_analysis.columns.tolist())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "d5285e88-e754-4b3b-9736-16b199005006", + "metadata": {}, "outputs": [], "source": [ - "# Your code goes here" + "marketing_customer_analysis = marketing_customer_analysis.drop(columns=[\"unnamed:_0\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "53950f4d-ca6f-4567-bf74-6c2e62bb2598", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
0DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM48029...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
1KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF0...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
2LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM22139...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
3XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM49078...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
4QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF23675...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage education \\\n", + "0 DK49336 Arizona 4809.216960 No Basic College \n", + "1 KX64629 California 2228.525238 No Basic College \n", + "2 LZ68649 Washington 14947.917300 No Basic Bachelor \n", + "3 XL78013 Oregon 22332.439460 Yes Extended College \n", + "4 QA50777 Oregon 9025.067525 No Premium Bachelor \n", + "\n", + " effective_to_date employmentstatus gender income ... number_of_policies \\\n", + "0 2011-02-18 Employed M 48029 ... 9 \n", + "1 2011-01-18 Unemployed F 0 ... 1 \n", + "2 2011-02-10 Employed M 22139 ... 2 \n", + "3 2011-01-11 Employed M 49078 ... 2 \n", + "4 2011-01-17 Medical Leave F 23675 ... 7 \n", + "\n", + " policy_type policy renew_offer_type sales_channel \\\n", + "0 Corporate Auto Corporate L3 Offer3 Agent \n", + "1 Personal Auto Personal L3 Offer4 Call Center \n", + "2 Personal Auto Personal L3 Offer3 Call Center \n", + "3 Corporate Auto Corporate L3 Offer2 Branch \n", + "4 Personal Auto Personal L2 Offer1 Branch \n", + "\n", + " total_claim_amount vehicle_class vehicle_size vehicle_type month \n", + "0 292.800000 Four-Door Car Medsize A 2 \n", + "1 744.924331 Four-Door Car Medsize A 1 \n", + "2 480.000000 SUV Medsize A 2 \n", + "3 484.013411 Four-Door Car Medsize A 1 \n", + "4 707.925645 Four-Door Car Medsize A 1 \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "marketing_customer_analysis.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "3e105632-4ab1-4c26-82aa-823e1ffd0ad6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 10910 entries, 0 to 10909\n", + "Data columns (total 26 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 customer 10910 non-null object \n", + " 1 state 10910 non-null object \n", + " 2 customer_lifetime_value 10910 non-null float64\n", + " 3 response 10910 non-null object \n", + " 4 coverage 10910 non-null object \n", + " 5 education 10910 non-null object \n", + " 6 effective_to_date 10910 non-null object \n", + " 7 employmentstatus 10910 non-null object \n", + " 8 gender 10910 non-null object \n", + " 9 income 10910 non-null int64 \n", + " 10 location_code 10910 non-null object \n", + " 11 marital_status 10910 non-null object \n", + " 12 monthly_premium_auto 10910 non-null int64 \n", + " 13 months_since_last_claim 10910 non-null float64\n", + " 14 months_since_policy_inception 10910 non-null int64 \n", + " 15 number_of_open_complaints 10910 non-null float64\n", + " 16 number_of_policies 10910 non-null int64 \n", + " 17 policy_type 10910 non-null object \n", + " 18 policy 10910 non-null object \n", + " 19 renew_offer_type 10910 non-null object \n", + " 20 sales_channel 10910 non-null object \n", + " 21 total_claim_amount 10910 non-null float64\n", + " 22 vehicle_class 10910 non-null object \n", + " 23 vehicle_size 10910 non-null object \n", + " 24 vehicle_type 10910 non-null object \n", + " 25 month 10910 non-null int64 \n", + "dtypes: float64(4), int64(5), object(17)\n", + "memory usage: 2.2+ MB\n" + ] + } + ], + "source": [ + "marketing_customer_analysis.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "ab92cb39-4b04-47b6-ae82-43c7e80a1650", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer 0\n", + "state 0\n", + "customer_lifetime_value 0\n", + "response 0\n", + "coverage 0\n", + "education 0\n", + "effective_to_date 0\n", + "employmentstatus 0\n", + "gender 0\n", + "income 0\n", + "location_code 0\n", + "marital_status 0\n", + "monthly_premium_auto 0\n", + "months_since_last_claim 0\n", + "months_since_policy_inception 0\n", + "number_of_open_complaints 0\n", + "number_of_policies 0\n", + "policy_type 0\n", + "policy 0\n", + "renew_offer_type 0\n", + "sales_channel 0\n", + "total_claim_amount 0\n", + "vehicle_class 0\n", + "vehicle_size 0\n", + "vehicle_type 0\n", + "month 0\n", + "dtype: int64" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "marketing_customer_analysis.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "d7317fc0-e46b-4e62-8ee7-e87ccf43cd32", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
0DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM48029...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
1KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF0...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
2LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM22139...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
3XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM49078...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
4QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF23675...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage education \\\n", + "0 DK49336 Arizona 4809.216960 No Basic College \n", + "1 KX64629 California 2228.525238 No Basic College \n", + "2 LZ68649 Washington 14947.917300 No Basic Bachelor \n", + "3 XL78013 Oregon 22332.439460 Yes Extended College \n", + "4 QA50777 Oregon 9025.067525 No Premium Bachelor \n", + "\n", + " effective_to_date employmentstatus gender income ... number_of_policies \\\n", + "0 2011-02-18 Employed M 48029 ... 9 \n", + "1 2011-01-18 Unemployed F 0 ... 1 \n", + "2 2011-02-10 Employed M 22139 ... 2 \n", + "3 2011-01-11 Employed M 49078 ... 2 \n", + "4 2011-01-17 Medical Leave F 23675 ... 7 \n", + "\n", + " policy_type policy renew_offer_type sales_channel \\\n", + "0 Corporate Auto Corporate L3 Offer3 Agent \n", + "1 Personal Auto Personal L3 Offer4 Call Center \n", + "2 Personal Auto Personal L3 Offer3 Call Center \n", + "3 Corporate Auto Corporate L3 Offer2 Branch \n", + "4 Personal Auto Personal L2 Offer1 Branch \n", + "\n", + " total_claim_amount vehicle_class vehicle_size vehicle_type month \n", + "0 292.800000 Four-Door Car Medsize A 2 \n", + "1 744.924331 Four-Door Car Medsize A 1 \n", + "2 480.000000 SUV Medsize A 2 \n", + "3 484.013411 Four-Door Car Medsize A 1 \n", + "4 707.925645 Four-Door Car Medsize A 1 \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "marketing_customer_analysis.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "a17cf687-194e-47d0-9f8d-c8199ce6341d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Agent', 'Call Center', 'Branch', 'Web'], dtype=object)" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "marketing_customer_analysis[\"sales_channel\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "e357f92b-8353-4a40-a549-c6eb8477deda", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " total_claim_amount\n", + "sales_channel \n", + "Agent 1810226.82\n", + "Branch 1301204.00\n", + "Call Center 926600.82\n", + "Web 706600.04\n" + ] + } + ], + "source": [ + "pivot_table = pd.pivot_table(\n", + " marketing_customer_analysis,\n", + " values=\"total_claim_amount\", \n", + " index=\"sales_channel\", \n", + " aggfunc=\"sum\" \n", + ").round(2)\n", + "print(pivot_table)" + ] + }, + { + "cell_type": "markdown", + "id": "3b7ab78c-5440-4c6e-90c5-09cafeb8bf82", + "metadata": {}, + "source": [ + "Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights" + ] + }, + { + "cell_type": "markdown", + "id": "31fb01b4-f21e-4b29-8b51-4eb6c3b9c229", + "metadata": {}, + "source": [ + "The Sales chanel had a best perfomance by the Agent channel, with 1810226.82. The worst performar was the Web channel with 706600.04. The Branch was the second best performance with 1301204.00, followed by Call Center with 926600.82. Should we contract more Agents, it seens that its effecient. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ec15cdc-1af8-40b7-b9be-13a0bbe787b9", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "55ab5e14-030b-4188-8d5d-ccf794daf6e4", + "metadata": {}, + "outputs": [], + "source": [ + "pivot_table2 = pd.pivot_table(\n", + " marketing_customer_analysis,\n", + " index='education', \n", + " columns='gender', \n", + " values='customer_lifetime_value', \n", + " aggfunc='mean' \n", + ").round(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "eb042365-f9e7-4e31-b24d-11e464e68727", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
genderFM
education
Bachelor7874.277703.60
College7748.828052.46
Doctor7328.517415.33
High School or Below8675.228149.69
Master8157.058168.83
\n", + "
" + ], + "text/plain": [ + "gender F M\n", + "education \n", + "Bachelor 7874.27 7703.60\n", + "College 7748.82 8052.46\n", + "Doctor 7328.51 7415.33\n", + "High School or Below 8675.22 8149.69\n", + "Master 8157.05 8168.83" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pivot_table2" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "4e6c78f7-8304-42b2-9f27-80a5412c17ee", + "metadata": {}, + "outputs": [], + "source": [ + "pivot_table2['Total'] = pivot_table2.sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "8080ad3f-285d-4997-b7a8-b17fd62d355b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
genderFMTotal
education
Bachelor7874.277703.6031155.74
College7748.828052.4631602.56
Doctor7328.517415.3329487.68
High School or Below8675.228149.6933649.82
Master8157.058168.8332651.76
\n", + "
" + ], + "text/plain": [ + "gender F M Total\n", + "education \n", + "Bachelor 7874.27 7703.60 31155.74\n", + "College 7748.82 8052.46 31602.56\n", + "Doctor 7328.51 7415.33 29487.68\n", + "High School or Below 8675.22 8149.69 33649.82\n", + "Master 8157.05 8168.83 32651.76" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pivot_table2" + ] + }, + { + "cell_type": "markdown", + "id": "283264b6-6ef2-47ca-8ac0-30220e079397", + "metadata": {}, + "source": [ + "# As we can see in the average customer lifetime value, there are no major differences when looking at gender and corresponding education level. Females are better paid in Bachelor, High School, or below. But males have a better income in College, Doctor, and Master. Customers with Doctor and High School education tend to have the highest average lifetime value. We can run an advertising campaign targeting these customers." ] }, { @@ -116,6 +2856,35 @@ "Show it in a long format table." ] }, + { + "cell_type": "code", + "execution_count": 79, + "id": "ca183766-372e-4f6d-8c01-1daec3842121", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " policy_type month number_of_open_complaints\n", + "0 Corporate Auto 1 443.434952\n", + "1 Corporate Auto 2 385.208135\n", + "2 Personal Auto 1 1727.605722\n", + "3 Personal Auto 2 1453.684441\n", + "4 Special Auto 1 87.074049\n", + "5 Special Auto 2 95.226817\n" + ] + } + ], + "source": [ + "pivot_table3 = pd.pivot_table(marketing_customer_analysis, index=['policy_type', 'month'], \n", + " values='number_of_open_complaints', \n", + " aggfunc='sum' \n", + ").reset_index() \n", + "\n", + "print(pivot_table3)" + ] + }, { "cell_type": "markdown", "id": "e3d09a8f-953c-448a-a5f8-2e5a8cca7291", @@ -136,9 +2905,7 @@ "id": "3a069e0b-b400-470e-904d-d17582191be4" }, "outputs": [], - "source": [ - "# Your code goes here" - ] + "source": [] } ], "metadata": { @@ -160,7 +2927,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4,