diff --git a/.ipynb_checkpoints/lab-dw-data-structuring-and-combining-checkpoint.ipynb b/.ipynb_checkpoints/lab-dw-data-structuring-and-combining-checkpoint.ipynb
new file mode 100644
index 0000000..6eb7a60
--- /dev/null
+++ b/.ipynb_checkpoints/lab-dw-data-structuring-and-combining-checkpoint.ipynb
@@ -0,0 +1,1286 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "25d7736c-ba17-4aff-b6bb-66eba20fbf4e",
+ "metadata": {
+ "id": "25d7736c-ba17-4aff-b6bb-66eba20fbf4e"
+ },
+ "source": [
+ "# Lab | Data Structuring and Combining Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a2cdfc70-44c8-478c-81e7-2bc43fdf4986",
+ "metadata": {
+ "id": "a2cdfc70-44c8-478c-81e7-2bc43fdf4986"
+ },
+ "source": [
+ "## Challenge 1: Combining & Cleaning Data\n",
+ "\n",
+ "In this challenge, we will be working with the customer data from an insurance company, as we did in the two previous labs. The data can be found here:\n",
+ "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\n",
+ "\n",
+ "But this time, we got new data, which can be found in the following 2 CSV files located at the links below.\n",
+ "\n",
+ "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\n",
+ "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\n",
+ "\n",
+ "Note that you'll need to clean and format the new data.\n",
+ "\n",
+ "Observation:\n",
+ "- One option is to first combine the three datasets and then apply the cleaning function to the new combined dataset\n",
+ "- Another option would be to read the clean file you saved in the previous lab, and just clean the two new files and concatenate the three clean datasets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "492d06e3-92c7-4105-ac72-536db98d3244",
+ "metadata": {
+ "id": "492d06e3-92c7-4105-ac72-536db98d3244"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 4003 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4004 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4005 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4006 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4007 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4008 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "4003 NaN NaN NaN NaN \n",
+ "4004 NaN NaN NaN NaN \n",
+ "4005 NaN NaN NaN NaN \n",
+ "4006 NaN NaN NaN NaN \n",
+ "4007 NaN NaN NaN NaN \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Total Claim Amount \n",
+ "0 2.704934 \n",
+ "1 1131.464935 \n",
+ "2 566.472247 \n",
+ "3 529.881344 \n",
+ "4 17.269323 \n",
+ "... ... \n",
+ "4003 NaN \n",
+ "4004 NaN \n",
+ "4005 NaN \n",
+ "4006 NaN \n",
+ "4007 NaN \n",
+ "\n",
+ "[4008 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "df1 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\")\n",
+ "df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "f4d09544",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Total Claim Amount | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " GS98873 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 323912.47% | \n",
+ " 16061 | \n",
+ " 88 | \n",
+ " 1/0/00 | \n",
+ " 633.600000 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 462680.11% | \n",
+ " 79487 | \n",
+ " 114 | \n",
+ " 1/0/00 | \n",
+ " 547.200000 | \n",
+ " Special Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " MY31220 | \n",
+ " California | \n",
+ " F | \n",
+ " College | \n",
+ " 899704.02% | \n",
+ " 54230 | \n",
+ " 112 | \n",
+ " 1/0/00 | \n",
+ " 537.600000 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " UH35128 | \n",
+ " Oregon | \n",
+ " F | \n",
+ " College | \n",
+ " 2580706.30% | \n",
+ " 71210 | \n",
+ " 214 | \n",
+ " 1/1/00 | \n",
+ " 1027.200000 | \n",
+ " Personal Auto | \n",
+ " Luxury Car | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " WH52799 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 380812.21% | \n",
+ " 94903 | \n",
+ " 94 | \n",
+ " 1/0/00 | \n",
+ " 451.200000 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 991 | \n",
+ " HV85198 | \n",
+ " Arizona | \n",
+ " M | \n",
+ " Master | \n",
+ " 847141.75% | \n",
+ " 63513 | \n",
+ " 70 | \n",
+ " 1/0/00 | \n",
+ " 185.667213 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 992 | \n",
+ " BS91566 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 543121.91% | \n",
+ " 58161 | \n",
+ " 68 | \n",
+ " 1/0/00 | \n",
+ " 140.747286 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 993 | \n",
+ " IL40123 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " College | \n",
+ " 568964.41% | \n",
+ " 83640 | \n",
+ " 70 | \n",
+ " 1/0/00 | \n",
+ " 471.050488 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 994 | \n",
+ " MY32149 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 368672.38% | \n",
+ " 0 | \n",
+ " 96 | \n",
+ " 1/0/00 | \n",
+ " 28.460568 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 995 | \n",
+ " SA91515 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 399258.39% | \n",
+ " 0 | \n",
+ " 111 | \n",
+ " 1/0/00 | \n",
+ " 700.349052 | \n",
+ " Personal Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
996 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education Customer Lifetime Value Income \\\n",
+ "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n",
+ "1 CW49887 California F Master 462680.11% 79487 \n",
+ "2 MY31220 California F College 899704.02% 54230 \n",
+ "3 UH35128 Oregon F College 2580706.30% 71210 \n",
+ "4 WH52799 Arizona F College 380812.21% 94903 \n",
+ ".. ... ... ... ... ... ... \n",
+ "991 HV85198 Arizona M Master 847141.75% 63513 \n",
+ "992 BS91566 Arizona F College 543121.91% 58161 \n",
+ "993 IL40123 Nevada F College 568964.41% 83640 \n",
+ "994 MY32149 California F Master 368672.38% 0 \n",
+ "995 SA91515 California M Bachelor 399258.39% 0 \n",
+ "\n",
+ " Monthly Premium Auto Number of Open Complaints Total Claim Amount \\\n",
+ "0 88 1/0/00 633.600000 \n",
+ "1 114 1/0/00 547.200000 \n",
+ "2 112 1/0/00 537.600000 \n",
+ "3 214 1/1/00 1027.200000 \n",
+ "4 94 1/0/00 451.200000 \n",
+ ".. ... ... ... \n",
+ "991 70 1/0/00 185.667213 \n",
+ "992 68 1/0/00 140.747286 \n",
+ "993 70 1/0/00 471.050488 \n",
+ "994 96 1/0/00 28.460568 \n",
+ "995 111 1/0/00 700.349052 \n",
+ "\n",
+ " Policy Type Vehicle Class \n",
+ "0 Personal Auto Four-Door Car \n",
+ "1 Special Auto SUV \n",
+ "2 Personal Auto Two-Door Car \n",
+ "3 Personal Auto Luxury Car \n",
+ "4 Corporate Auto Two-Door Car \n",
+ ".. ... ... \n",
+ "991 Personal Auto Four-Door Car \n",
+ "992 Corporate Auto Four-Door Car \n",
+ "993 Corporate Auto Two-Door Car \n",
+ "994 Personal Auto Two-Door Car \n",
+ "995 Personal Auto SUV \n",
+ "\n",
+ "[996 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\")\n",
+ "df2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "b48e9b60",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " State | \n",
+ " Customer Lifetime Value | \n",
+ " Education | \n",
+ " Gender | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Total Claim Amount | \n",
+ " Vehicle Class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " SA25987 | \n",
+ " Washington | \n",
+ " 3479.137523 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 0 | \n",
+ " 104 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 499.200000 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " TB86706 | \n",
+ " Arizona | \n",
+ " 2502.637401 | \n",
+ " Master | \n",
+ " M | \n",
+ " 0 | \n",
+ " 66 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 3.468912 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " ZL73902 | \n",
+ " Nevada | \n",
+ " 3265.156348 | \n",
+ " Bachelor | \n",
+ " F | \n",
+ " 25820 | \n",
+ " 82 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 393.600000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " KX23516 | \n",
+ " California | \n",
+ " 4455.843406 | \n",
+ " High School or Below | \n",
+ " F | \n",
+ " 0 | \n",
+ " 121 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 699.615192 | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " FN77294 | \n",
+ " California | \n",
+ " 7704.958480 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 30366 | \n",
+ " 101 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " 484.800000 | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 7065 | \n",
+ " LA72316 | \n",
+ " California | \n",
+ " 23405.987980 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 71941 | \n",
+ " 73 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 198.234764 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 7066 | \n",
+ " PK87824 | \n",
+ " California | \n",
+ " 3096.511217 | \n",
+ " College | \n",
+ " F | \n",
+ " 21604 | \n",
+ " 79 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " 379.200000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 7067 | \n",
+ " TD14365 | \n",
+ " California | \n",
+ " 8163.890428 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 0 | \n",
+ " 85 | \n",
+ " 3 | \n",
+ " Corporate Auto | \n",
+ " 790.784983 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 7068 | \n",
+ " UP19263 | \n",
+ " California | \n",
+ " 7524.442436 | \n",
+ " College | \n",
+ " M | \n",
+ " 21941 | \n",
+ " 96 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 691.200000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 7069 | \n",
+ " Y167826 | \n",
+ " California | \n",
+ " 2611.836866 | \n",
+ " College | \n",
+ " M | \n",
+ " 0 | \n",
+ " 77 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " 369.600000 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
7070 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer State Customer Lifetime Value Education \\\n",
+ "0 SA25987 Washington 3479.137523 High School or Below \n",
+ "1 TB86706 Arizona 2502.637401 Master \n",
+ "2 ZL73902 Nevada 3265.156348 Bachelor \n",
+ "3 KX23516 California 4455.843406 High School or Below \n",
+ "4 FN77294 California 7704.958480 High School or Below \n",
+ "... ... ... ... ... \n",
+ "7065 LA72316 California 23405.987980 Bachelor \n",
+ "7066 PK87824 California 3096.511217 College \n",
+ "7067 TD14365 California 8163.890428 Bachelor \n",
+ "7068 UP19263 California 7524.442436 College \n",
+ "7069 Y167826 California 2611.836866 College \n",
+ "\n",
+ " Gender Income Monthly Premium Auto Number of Open Complaints \\\n",
+ "0 M 0 104 0 \n",
+ "1 M 0 66 0 \n",
+ "2 F 25820 82 0 \n",
+ "3 F 0 121 0 \n",
+ "4 M 30366 101 2 \n",
+ "... ... ... ... ... \n",
+ "7065 M 71941 73 0 \n",
+ "7066 F 21604 79 0 \n",
+ "7067 M 0 85 3 \n",
+ "7068 M 21941 96 0 \n",
+ "7069 M 0 77 0 \n",
+ "\n",
+ " Policy Type Total Claim Amount Vehicle Class \n",
+ "0 Personal Auto 499.200000 Two-Door Car \n",
+ "1 Personal Auto 3.468912 Two-Door Car \n",
+ "2 Personal Auto 393.600000 Four-Door Car \n",
+ "3 Personal Auto 699.615192 SUV \n",
+ "4 Personal Auto 484.800000 SUV \n",
+ "... ... ... ... \n",
+ "7065 Personal Auto 198.234764 Four-Door Car \n",
+ "7066 Corporate Auto 379.200000 Four-Door Car \n",
+ "7067 Corporate Auto 790.784983 Four-Door Car \n",
+ "7068 Personal Auto 691.200000 Four-Door Car \n",
+ "7069 Corporate Auto 369.600000 Two-Door Car \n",
+ "\n",
+ "[7070 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df3 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\")\n",
+ "df3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "35090aa7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 4003 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4004 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4005 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4006 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4007 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4008 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "4003 NaN NaN NaN NaN \n",
+ "4004 NaN NaN NaN NaN \n",
+ "4005 NaN NaN NaN NaN \n",
+ "4006 NaN NaN NaN NaN \n",
+ "4007 NaN NaN NaN NaN \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Total Claim Amount \n",
+ "0 2.704934 \n",
+ "1 1131.464935 \n",
+ "2 566.472247 \n",
+ "3 529.881344 \n",
+ "4 17.269323 \n",
+ "... ... \n",
+ "4003 NaN \n",
+ "4004 NaN \n",
+ "4005 NaN \n",
+ "4006 NaN \n",
+ "4007 NaN \n",
+ "\n",
+ "[4008 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "a795af02",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value',\n",
+ " 'Income', 'Monthly Premium Auto', 'Number of Open Complaints',\n",
+ " 'Policy Type', 'Vehicle Class', 'Total Claim Amount'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "0c4059ef",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['customer', 'st', 'gender', 'education', 'customer lifetime value',\n",
+ " 'income', 'monthly premium auto', 'number of open complaints',\n",
+ " 'policy type', 'vehicle class', 'total claim amount'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1.columns.str.lower()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "43746587",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1.rename(columns={\"old_column_name1\": \"new_column_name1\", \"old_column_name2\": \"new_column_name2\",...}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "afd9cbe0",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3cb5ce7a",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "488ffb2e",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0517cc02",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "31b8a9e7-7db9-4604-991b-ef6771603e57",
+ "metadata": {
+ "id": "31b8a9e7-7db9-4604-991b-ef6771603e57"
+ },
+ "source": [
+ "# Challenge 2: Structuring Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a877fd6d-7a0c-46d2-9657-f25036e4ca4b",
+ "metadata": {
+ "id": "a877fd6d-7a0c-46d2-9657-f25036e4ca4b"
+ },
+ "source": [
+ "In this challenge, we will continue to work with customer data from an insurance company, but we will use a dataset with more columns, called marketing_customer_analysis.csv, which can be found at the following link:\n",
+ "\n",
+ "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\n",
+ "\n",
+ "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by performing data cleaning, formatting, and structuring."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26",
+ "metadata": {
+ "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26"
+ },
+ "outputs": [],
+ "source": [
+ "# Your code goes here"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "df35fd0d-513e-4e77-867e-429da10a9cc7",
+ "metadata": {
+ "id": "df35fd0d-513e-4e77-867e-429da10a9cc7"
+ },
+ "source": [
+ "1. You work at the marketing department and you want to know which sales channel brought the most sales in terms of total revenue. Using pivot, create a summary table showing the total revenue for each sales channel (branch, call center, web, and mail).\n",
+ "Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "640993b2-a291-436c-a34d-a551144f8196",
+ "metadata": {
+ "id": "640993b2-a291-436c-a34d-a551144f8196"
+ },
+ "source": [
+ "2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "32c7f2e5-3d90-43e5-be33-9781b6069198",
+ "metadata": {
+ "id": "32c7f2e5-3d90-43e5-be33-9781b6069198"
+ },
+ "source": [
+ "## Bonus\n",
+ "\n",
+ "You work at the customer service department and you want to know which months had the highest number of complaints by policy type category. Create a summary table showing the number of complaints by policy type and month.\n",
+ "Show it in a long format table."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e3d09a8f-953c-448a-a5f8-2e5a8cca7291",
+ "metadata": {
+ "id": "e3d09a8f-953c-448a-a5f8-2e5a8cca7291"
+ },
+ "source": [
+ "*In data analysis, a long format table is a way of structuring data in which each observation or measurement is stored in a separate row of the table. The key characteristic of a long format table is that each column represents a single variable, and each row represents a single observation of that variable.*\n",
+ "\n",
+ "*More information about long and wide format tables here: https://www.statology.org/long-vs-wide-data/*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3a069e0b-b400-470e-904d-d17582191be4",
+ "metadata": {
+ "id": "3a069e0b-b400-470e-904d-d17582191be4"
+ },
+ "outputs": [],
+ "source": [
+ "# Your code goes here"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb
index ec4e3f9..9c9112d 100644
--- a/lab-dw-data-structuring-and-combining.ipynb
+++ b/lab-dw-data-structuring-and-combining.ipynb
@@ -36,16 +36,3208 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"id": "492d06e3-92c7-4105-ac72-536db98d3244",
"metadata": {
"id": "492d06e3-92c7-4105-ac72-536db98d3244"
},
"outputs": [],
"source": [
- "# Your code goes here"
+ "import pandas as pd\n",
+ "df1 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\")\n",
+ "#df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "94b17acc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df2 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\")\n",
+ "#df2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "86a0178e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df3 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\")\n",
+ "#df3"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d4abcb8f",
+ "metadata": {},
+ "source": [
+ "# Cleaning df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "38a86441",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1.columns = df1.columns.str.lower()\n",
+ "df1.columns = df1.columns.str.replace(\" \", \"_\")\n",
+ "df1.rename(columns={\"st\":\"state\"}, inplace=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "ff7995dc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "customer 2937\n",
+ "state 2937\n",
+ "gender 3054\n",
+ "education 2937\n",
+ "customer_lifetime_value 2940\n",
+ "income 2937\n",
+ "monthly_premium_auto 2937\n",
+ "number_of_open_complaints 2937\n",
+ "policy_type 2937\n",
+ "vehicle_class 2937\n",
+ "total_claim_amount 2937\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1[\"customer_lifetime_value\"] = df1[\"customer_lifetime_value\"].str.strip(\"%\")\n",
+ "df1[\"customer_lifetime_value\"] = pd.to_numeric(df1[\"customer_lifetime_value\"], errors=\"coerce\")\n",
+ "df1.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "17a4ff8c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1[\"number_of_open_complaints\"] = df1[\"number_of_open_complaints\"].str.split(\"/\", expand=True)[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "fb710d08",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "customer 0\n",
+ "state 0\n",
+ "gender 117\n",
+ "education 0\n",
+ "customer_lifetime_value 3\n",
+ "income 0\n",
+ "monthly_premium_auto 0\n",
+ "number_of_open_complaints 0\n",
+ "policy_type 0\n",
+ "vehicle_class 0\n",
+ "total_claim_amount 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1 = df1.dropna(how=\"all\")\n",
+ "df1.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "0315ffe2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "customer 0\n",
+ "state 0\n",
+ "gender 0\n",
+ "education 0\n",
+ "customer_lifetime_value 0\n",
+ "income 0\n",
+ "monthly_premium_auto 0\n",
+ "number_of_open_complaints 0\n",
+ "policy_type 0\n",
+ "vehicle_class 0\n",
+ "total_claim_amount 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1[\"gender\"] = df1[\"gender\"].fillna(\"unknown\")\n",
+ "df1[\"customer_lifetime_value\"] = df1[\"customer_lifetime_value\"].fillna(0)\n",
+ "df1.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "6773cc52",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " gender | \n",
+ " education | \n",
+ " customer_lifetime_value | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " number_of_open_complaints | \n",
+ " policy_type | \n",
+ " vehicle_class | \n",
+ " total_claim_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " unknown | \n",
+ " Master | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59 | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17 | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18 | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65 | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 1066 | \n",
+ " TM65736 | \n",
+ " Oregon | \n",
+ " M | \n",
+ " Master | \n",
+ " 305955.03 | \n",
+ " 38644.0 | \n",
+ " 78.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 361.455219 | \n",
+ "
\n",
+ " \n",
+ " | 1067 | \n",
+ " VJ51327 | \n",
+ " Cali | \n",
+ " F | \n",
+ " High School or Below | \n",
+ " 2031499.76 | \n",
+ " 63209.0 | \n",
+ " 102.0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " SUV | \n",
+ " 207.320041 | \n",
+ "
\n",
+ " \n",
+ " | 1068 | \n",
+ " GS98873 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 323912.47 | \n",
+ " 16061.0 | \n",
+ " 88.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 633.600000 | \n",
+ "
\n",
+ " \n",
+ " | 1069 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 462680.11 | \n",
+ " 79487.0 | \n",
+ " 114.0 | \n",
+ " 0 | \n",
+ " Special Auto | \n",
+ " SUV | \n",
+ " 547.200000 | \n",
+ "
\n",
+ " \n",
+ " | 1070 | \n",
+ " MY31220 | \n",
+ " California | \n",
+ " F | \n",
+ " College | \n",
+ " 899704.02 | \n",
+ " 54230.0 | \n",
+ " 112.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 537.600000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1071 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state gender education \\\n",
+ "0 RB50392 Washington unknown Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "1066 TM65736 Oregon M Master \n",
+ "1067 VJ51327 Cali F High School or Below \n",
+ "1068 GS98873 Arizona F Bachelor \n",
+ "1069 CW49887 California F Master \n",
+ "1070 MY31220 California F College \n",
+ "\n",
+ " customer_lifetime_value income monthly_premium_auto \\\n",
+ "0 0.00 0.0 1000.0 \n",
+ "1 697953.59 0.0 94.0 \n",
+ "2 1288743.17 48767.0 108.0 \n",
+ "3 764586.18 0.0 106.0 \n",
+ "4 536307.65 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "1066 305955.03 38644.0 78.0 \n",
+ "1067 2031499.76 63209.0 102.0 \n",
+ "1068 323912.47 16061.0 88.0 \n",
+ "1069 462680.11 79487.0 114.0 \n",
+ "1070 899704.02 54230.0 112.0 \n",
+ "\n",
+ " number_of_open_complaints policy_type vehicle_class \\\n",
+ "0 0 Personal Auto Four-Door Car \n",
+ "1 0 Personal Auto Four-Door Car \n",
+ "2 0 Personal Auto Two-Door Car \n",
+ "3 0 Corporate Auto SUV \n",
+ "4 0 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "1066 1 Personal Auto Four-Door Car \n",
+ "1067 2 Personal Auto SUV \n",
+ "1068 0 Personal Auto Four-Door Car \n",
+ "1069 0 Special Auto SUV \n",
+ "1070 0 Personal Auto Two-Door Car \n",
+ "\n",
+ " total_claim_amount \n",
+ "0 2.704934 \n",
+ "1 1131.464935 \n",
+ "2 566.472247 \n",
+ "3 529.881344 \n",
+ "4 17.269323 \n",
+ "... ... \n",
+ "1066 361.455219 \n",
+ "1067 207.320041 \n",
+ "1068 633.600000 \n",
+ "1069 547.200000 \n",
+ "1070 537.600000 \n",
+ "\n",
+ "[1071 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "94604f47",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "text_columns = [\"gender\", \"education\", \"policy_type\", \"vehicle_class\", \"state\"]\n",
+ "for column in text_columns:\n",
+ " df1[column] = df1[column].str.lower().str.strip()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "49420406",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " gender | \n",
+ " education | \n",
+ " customer_lifetime_value | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " number_of_open_complaints | \n",
+ " policy_type | \n",
+ " vehicle_class | \n",
+ " total_claim_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " RB50392 | \n",
+ " washington | \n",
+ " unknown | \n",
+ " master | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " QZ44356 | \n",
+ " arizona | \n",
+ " f | \n",
+ " bachelor | \n",
+ " 697953.59 | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AI49188 | \n",
+ " nevada | \n",
+ " f | \n",
+ " bachelor | \n",
+ " 1288743.17 | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " two-door car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " WW63253 | \n",
+ " california | \n",
+ " m | \n",
+ " bachelor | \n",
+ " 764586.18 | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 0 | \n",
+ " corporate auto | \n",
+ " suv | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " GA49547 | \n",
+ " washington | \n",
+ " m | \n",
+ " high school or below | \n",
+ " 536307.65 | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 1066 | \n",
+ " TM65736 | \n",
+ " oregon | \n",
+ " m | \n",
+ " master | \n",
+ " 305955.03 | \n",
+ " 38644.0 | \n",
+ " 78.0 | \n",
+ " 1 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 361.455219 | \n",
+ "
\n",
+ " \n",
+ " | 1067 | \n",
+ " VJ51327 | \n",
+ " cali | \n",
+ " f | \n",
+ " high school or below | \n",
+ " 2031499.76 | \n",
+ " 63209.0 | \n",
+ " 102.0 | \n",
+ " 2 | \n",
+ " personal auto | \n",
+ " suv | \n",
+ " 207.320041 | \n",
+ "
\n",
+ " \n",
+ " | 1068 | \n",
+ " GS98873 | \n",
+ " arizona | \n",
+ " f | \n",
+ " bachelor | \n",
+ " 323912.47 | \n",
+ " 16061.0 | \n",
+ " 88.0 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 633.600000 | \n",
+ "
\n",
+ " \n",
+ " | 1069 | \n",
+ " CW49887 | \n",
+ " california | \n",
+ " f | \n",
+ " master | \n",
+ " 462680.11 | \n",
+ " 79487.0 | \n",
+ " 114.0 | \n",
+ " 0 | \n",
+ " special auto | \n",
+ " suv | \n",
+ " 547.200000 | \n",
+ "
\n",
+ " \n",
+ " | 1070 | \n",
+ " MY31220 | \n",
+ " california | \n",
+ " f | \n",
+ " college | \n",
+ " 899704.02 | \n",
+ " 54230.0 | \n",
+ " 112.0 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " two-door car | \n",
+ " 537.600000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1071 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state gender education \\\n",
+ "0 RB50392 washington unknown master \n",
+ "1 QZ44356 arizona f bachelor \n",
+ "2 AI49188 nevada f bachelor \n",
+ "3 WW63253 california m bachelor \n",
+ "4 GA49547 washington m high school or below \n",
+ "... ... ... ... ... \n",
+ "1066 TM65736 oregon m master \n",
+ "1067 VJ51327 cali f high school or below \n",
+ "1068 GS98873 arizona f bachelor \n",
+ "1069 CW49887 california f master \n",
+ "1070 MY31220 california f college \n",
+ "\n",
+ " customer_lifetime_value income monthly_premium_auto \\\n",
+ "0 0.00 0.0 1000.0 \n",
+ "1 697953.59 0.0 94.0 \n",
+ "2 1288743.17 48767.0 108.0 \n",
+ "3 764586.18 0.0 106.0 \n",
+ "4 536307.65 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "1066 305955.03 38644.0 78.0 \n",
+ "1067 2031499.76 63209.0 102.0 \n",
+ "1068 323912.47 16061.0 88.0 \n",
+ "1069 462680.11 79487.0 114.0 \n",
+ "1070 899704.02 54230.0 112.0 \n",
+ "\n",
+ " number_of_open_complaints policy_type vehicle_class \\\n",
+ "0 0 personal auto four-door car \n",
+ "1 0 personal auto four-door car \n",
+ "2 0 personal auto two-door car \n",
+ "3 0 corporate auto suv \n",
+ "4 0 personal auto four-door car \n",
+ "... ... ... ... \n",
+ "1066 1 personal auto four-door car \n",
+ "1067 2 personal auto suv \n",
+ "1068 0 personal auto four-door car \n",
+ "1069 0 special auto suv \n",
+ "1070 0 personal auto two-door car \n",
+ "\n",
+ " total_claim_amount \n",
+ "0 2.704934 \n",
+ "1 1131.464935 \n",
+ "2 566.472247 \n",
+ "3 529.881344 \n",
+ "4 17.269323 \n",
+ "... ... \n",
+ "1066 361.455219 \n",
+ "1067 207.320041 \n",
+ "1068 633.600000 \n",
+ "1069 547.200000 \n",
+ "1070 537.600000 \n",
+ "\n",
+ "[1071 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "688c5dbc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['unknown', 'female', 'male'], dtype=object)"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1[\"gender\"] = df1[\"gender\"].replace({\"m\": \"male\", \"f\": \"female\", \"femal\": \"female\"})\n",
+ "df1[\"gender\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "e8d2c1e0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['four-door car', 'two-door car', 'suv', 'luxury'], dtype=object)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1[\"vehicle_class\"] = df1[\"vehicle_class\"].replace({\"luxury suv\": \"luxury\", \"luxury car\": \"luxury\", \"sports car\": \"luxury\", \"luxery\": \"luxury\" })\n",
+ "df1[\"vehicle_class\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "e9c343c2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['washington', 'arizona', 'nevada', 'california', 'oregon'],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1[\"state\"] = df1[\"state\"].replace({\"cali\": \"california\", \"az\": \"arizona\", \"wa\": \"washington\"})\n",
+ "df1[\"state\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "bae40876",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['master', 'bachelor', 'high school or below', 'college', 'doctor'],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#df1[\"education\"].unique()\n",
+ "df1[\"education\"] = df1[\"education\"].replace({\"bachelors\": \"bachelor\"})\n",
+ "df1[\"education\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "3ce9d3ac",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['personal auto', 'corporate auto', 'special auto'], dtype=object)"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1[\"policy_type\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "8f1cc493",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1[\"income\"] = df1[\"income\"].astype(int)\n",
+ "df1[\"monthly_premium_auto\"] = df1[\"monthly_premium_auto\"].astype(int)\n",
+ "df1[\"number_of_open_complaints\"] = df1[\"number_of_open_complaints\"].astype(int)\n",
+ "df1[\"customer_lifetime_value\"] = df1[\"customer_lifetime_value\"].round(2)\n",
+ "df1[\"total_claim_amount\"] = df1[\"total_claim_amount\"].round(2)"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "9d7e96f8",
+ "metadata": {},
+ "source": [
+ "# Cleaning df2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "7d38baae",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Total Claim Amount | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " GS98873 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 323912.47% | \n",
+ " 16061 | \n",
+ " 88 | \n",
+ " 1/0/00 | \n",
+ " 633.600000 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 462680.11% | \n",
+ " 79487 | \n",
+ " 114 | \n",
+ " 1/0/00 | \n",
+ " 547.200000 | \n",
+ " Special Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " MY31220 | \n",
+ " California | \n",
+ " F | \n",
+ " College | \n",
+ " 899704.02% | \n",
+ " 54230 | \n",
+ " 112 | \n",
+ " 1/0/00 | \n",
+ " 537.600000 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " UH35128 | \n",
+ " Oregon | \n",
+ " F | \n",
+ " College | \n",
+ " 2580706.30% | \n",
+ " 71210 | \n",
+ " 214 | \n",
+ " 1/1/00 | \n",
+ " 1027.200000 | \n",
+ " Personal Auto | \n",
+ " Luxury Car | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " WH52799 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 380812.21% | \n",
+ " 94903 | \n",
+ " 94 | \n",
+ " 1/0/00 | \n",
+ " 451.200000 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 991 | \n",
+ " HV85198 | \n",
+ " Arizona | \n",
+ " M | \n",
+ " Master | \n",
+ " 847141.75% | \n",
+ " 63513 | \n",
+ " 70 | \n",
+ " 1/0/00 | \n",
+ " 185.667213 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 992 | \n",
+ " BS91566 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 543121.91% | \n",
+ " 58161 | \n",
+ " 68 | \n",
+ " 1/0/00 | \n",
+ " 140.747286 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 993 | \n",
+ " IL40123 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " College | \n",
+ " 568964.41% | \n",
+ " 83640 | \n",
+ " 70 | \n",
+ " 1/0/00 | \n",
+ " 471.050488 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 994 | \n",
+ " MY32149 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 368672.38% | \n",
+ " 0 | \n",
+ " 96 | \n",
+ " 1/0/00 | \n",
+ " 28.460568 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 995 | \n",
+ " SA91515 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 399258.39% | \n",
+ " 0 | \n",
+ " 111 | \n",
+ " 1/0/00 | \n",
+ " 700.349052 | \n",
+ " Personal Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
996 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education Customer Lifetime Value Income \\\n",
+ "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n",
+ "1 CW49887 California F Master 462680.11% 79487 \n",
+ "2 MY31220 California F College 899704.02% 54230 \n",
+ "3 UH35128 Oregon F College 2580706.30% 71210 \n",
+ "4 WH52799 Arizona F College 380812.21% 94903 \n",
+ ".. ... ... ... ... ... ... \n",
+ "991 HV85198 Arizona M Master 847141.75% 63513 \n",
+ "992 BS91566 Arizona F College 543121.91% 58161 \n",
+ "993 IL40123 Nevada F College 568964.41% 83640 \n",
+ "994 MY32149 California F Master 368672.38% 0 \n",
+ "995 SA91515 California M Bachelor 399258.39% 0 \n",
+ "\n",
+ " Monthly Premium Auto Number of Open Complaints Total Claim Amount \\\n",
+ "0 88 1/0/00 633.600000 \n",
+ "1 114 1/0/00 547.200000 \n",
+ "2 112 1/0/00 537.600000 \n",
+ "3 214 1/1/00 1027.200000 \n",
+ "4 94 1/0/00 451.200000 \n",
+ ".. ... ... ... \n",
+ "991 70 1/0/00 185.667213 \n",
+ "992 68 1/0/00 140.747286 \n",
+ "993 70 1/0/00 471.050488 \n",
+ "994 96 1/0/00 28.460568 \n",
+ "995 111 1/0/00 700.349052 \n",
+ "\n",
+ " Policy Type Vehicle Class \n",
+ "0 Personal Auto Four-Door Car \n",
+ "1 Special Auto SUV \n",
+ "2 Personal Auto Two-Door Car \n",
+ "3 Personal Auto Luxury Car \n",
+ "4 Corporate Auto Two-Door Car \n",
+ ".. ... ... \n",
+ "991 Personal Auto Four-Door Car \n",
+ "992 Corporate Auto Four-Door Car \n",
+ "993 Corporate Auto Two-Door Car \n",
+ "994 Personal Auto Two-Door Car \n",
+ "995 Personal Auto SUV \n",
+ "\n",
+ "[996 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "096441e7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df2.columns = df2.columns.str.lower()\n",
+ "df2.columns = df2.columns.str.replace(\" \", \"_\")\n",
+ "df2.rename(columns={\"st\":\"state\"}, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "2a6d8955",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "customer 0\n",
+ "state 0\n",
+ "gender 5\n",
+ "education 0\n",
+ "customer_lifetime_value 0\n",
+ "income 0\n",
+ "monthly_premium_auto 0\n",
+ "number_of_open_complaints 0\n",
+ "total_claim_amount 0\n",
+ "policy_type 0\n",
+ "vehicle_class 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2[\"customer_lifetime_value\"] = df2[\"customer_lifetime_value\"].str.strip(\"%\")\n",
+ "df2[\"customer_lifetime_value\"] = pd.to_numeric(df1[\"customer_lifetime_value\"], errors=\"coerce\")\n",
+ "df2.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "0ddeb913",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df2[\"gender\"] = df2[\"gender\"].fillna(\"unknown\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "5167588d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " gender | \n",
+ " education | \n",
+ " customer_lifetime_value | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " number_of_open_complaints | \n",
+ " total_claim_amount | \n",
+ " policy_type | \n",
+ " vehicle_class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " GS98873 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 0.00 | \n",
+ " 16061 | \n",
+ " 88 | \n",
+ " 1/0/00 | \n",
+ " 633.600000 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 697953.59 | \n",
+ " 79487 | \n",
+ " 114 | \n",
+ " 1/0/00 | \n",
+ " 547.200000 | \n",
+ " Special Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " MY31220 | \n",
+ " California | \n",
+ " F | \n",
+ " College | \n",
+ " 1288743.17 | \n",
+ " 54230 | \n",
+ " 112 | \n",
+ " 1/0/00 | \n",
+ " 537.600000 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " UH35128 | \n",
+ " Oregon | \n",
+ " F | \n",
+ " College | \n",
+ " 764586.18 | \n",
+ " 71210 | \n",
+ " 214 | \n",
+ " 1/1/00 | \n",
+ " 1027.200000 | \n",
+ " Personal Auto | \n",
+ " Luxury Car | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " WH52799 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 536307.65 | \n",
+ " 94903 | \n",
+ " 94 | \n",
+ " 1/0/00 | \n",
+ " 451.200000 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 991 | \n",
+ " HV85198 | \n",
+ " Arizona | \n",
+ " M | \n",
+ " Master | \n",
+ " 2153133.28 | \n",
+ " 63513 | \n",
+ " 70 | \n",
+ " 1/0/00 | \n",
+ " 185.667213 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 992 | \n",
+ " BS91566 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 1262283.27 | \n",
+ " 58161 | \n",
+ " 68 | \n",
+ " 1/0/00 | \n",
+ " 140.747286 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 993 | \n",
+ " IL40123 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " College | \n",
+ " 2017196.15 | \n",
+ " 83640 | \n",
+ " 70 | \n",
+ " 1/0/00 | \n",
+ " 471.050488 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 994 | \n",
+ " MY32149 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 1646436.59 | \n",
+ " 0 | \n",
+ " 96 | \n",
+ " 1/0/00 | \n",
+ " 28.460568 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 995 | \n",
+ " SA91515 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 559538.99 | \n",
+ " 0 | \n",
+ " 111 | \n",
+ " 1/0/00 | \n",
+ " 700.349052 | \n",
+ " Personal Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
996 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state gender education customer_lifetime_value income \\\n",
+ "0 GS98873 Arizona F Bachelor 0.00 16061 \n",
+ "1 CW49887 California F Master 697953.59 79487 \n",
+ "2 MY31220 California F College 1288743.17 54230 \n",
+ "3 UH35128 Oregon F College 764586.18 71210 \n",
+ "4 WH52799 Arizona F College 536307.65 94903 \n",
+ ".. ... ... ... ... ... ... \n",
+ "991 HV85198 Arizona M Master 2153133.28 63513 \n",
+ "992 BS91566 Arizona F College 1262283.27 58161 \n",
+ "993 IL40123 Nevada F College 2017196.15 83640 \n",
+ "994 MY32149 California F Master 1646436.59 0 \n",
+ "995 SA91515 California M Bachelor 559538.99 0 \n",
+ "\n",
+ " monthly_premium_auto number_of_open_complaints total_claim_amount \\\n",
+ "0 88 1/0/00 633.600000 \n",
+ "1 114 1/0/00 547.200000 \n",
+ "2 112 1/0/00 537.600000 \n",
+ "3 214 1/1/00 1027.200000 \n",
+ "4 94 1/0/00 451.200000 \n",
+ ".. ... ... ... \n",
+ "991 70 1/0/00 185.667213 \n",
+ "992 68 1/0/00 140.747286 \n",
+ "993 70 1/0/00 471.050488 \n",
+ "994 96 1/0/00 28.460568 \n",
+ "995 111 1/0/00 700.349052 \n",
+ "\n",
+ " policy_type vehicle_class \n",
+ "0 Personal Auto Four-Door Car \n",
+ "1 Special Auto SUV \n",
+ "2 Personal Auto Two-Door Car \n",
+ "3 Personal Auto Luxury Car \n",
+ "4 Corporate Auto Two-Door Car \n",
+ ".. ... ... \n",
+ "991 Personal Auto Four-Door Car \n",
+ "992 Corporate Auto Four-Door Car \n",
+ "993 Corporate Auto Two-Door Car \n",
+ "994 Personal Auto Two-Door Car \n",
+ "995 Personal Auto SUV \n",
+ "\n",
+ "[996 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "daa64953",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "text_columns = [\"gender\", \"education\", \"policy_type\", \"vehicle_class\", \"state\"]\n",
+ "for column in text_columns:\n",
+ " df2[column] = df1[column].str.lower().str.strip()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "c3394711",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['washington', 'arizona', 'nevada', 'california', 'oregon'],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2[\"state\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "3198eaaa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['unknown', 'female', 'male'], dtype=object)"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2[\"gender\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "301f87ce",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['master', 'bachelor', 'high school or below', 'college', 'doctor'],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2[\"education\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "26e27e09",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['personal auto', 'corporate auto', 'special auto'], dtype=object)"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2[\"policy_type\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "88857119",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['four-door car', 'two-door car', 'suv', 'luxury'], dtype=object)"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2[\"vehicle_class\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "30f064b1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['1/0/00', '1/1/00', '1/3/00', '1/5/00', '1/2/00', '1/4/00'],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2[\"number_of_open_complaints\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "845e7522",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df2[\"number_of_open_complaints\"] = df2[\"number_of_open_complaints\"].str.split(\"/\", expand=True)[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "7af46c3d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['0', '1', '3', '5', '2', '4'], dtype=object)"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2[\"number_of_open_complaints\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "8c40edf7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df2[\"number_of_open_complaints\"] = df2[\"number_of_open_complaints\"].astype(int)\n",
+ "df2[\"customer_lifetime_value\"] = df2[\"customer_lifetime_value\"].round(2)\n",
+ "df2[\"total_claim_amount\"] = df2[\"total_claim_amount\"].round(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "e2174417",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " gender | \n",
+ " education | \n",
+ " customer_lifetime_value | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " number_of_open_complaints | \n",
+ " total_claim_amount | \n",
+ " policy_type | \n",
+ " vehicle_class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " GS98873 | \n",
+ " washington | \n",
+ " unknown | \n",
+ " master | \n",
+ " 0.00 | \n",
+ " 16061 | \n",
+ " 88 | \n",
+ " 0 | \n",
+ " 633.60 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " CW49887 | \n",
+ " arizona | \n",
+ " female | \n",
+ " bachelor | \n",
+ " 697953.59 | \n",
+ " 79487 | \n",
+ " 114 | \n",
+ " 0 | \n",
+ " 547.20 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " MY31220 | \n",
+ " nevada | \n",
+ " female | \n",
+ " bachelor | \n",
+ " 1288743.17 | \n",
+ " 54230 | \n",
+ " 112 | \n",
+ " 0 | \n",
+ " 537.60 | \n",
+ " personal auto | \n",
+ " two-door car | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " UH35128 | \n",
+ " california | \n",
+ " male | \n",
+ " bachelor | \n",
+ " 764586.18 | \n",
+ " 71210 | \n",
+ " 214 | \n",
+ " 1 | \n",
+ " 1027.20 | \n",
+ " corporate auto | \n",
+ " suv | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " WH52799 | \n",
+ " washington | \n",
+ " male | \n",
+ " high school or below | \n",
+ " 536307.65 | \n",
+ " 94903 | \n",
+ " 94 | \n",
+ " 0 | \n",
+ " 451.20 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 991 | \n",
+ " HV85198 | \n",
+ " oregon | \n",
+ " female | \n",
+ " high school or below | \n",
+ " 2153133.28 | \n",
+ " 63513 | \n",
+ " 70 | \n",
+ " 0 | \n",
+ " 185.67 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ "
\n",
+ " \n",
+ " | 992 | \n",
+ " BS91566 | \n",
+ " arizona | \n",
+ " male | \n",
+ " bachelor | \n",
+ " 1262283.27 | \n",
+ " 58161 | \n",
+ " 68 | \n",
+ " 0 | \n",
+ " 140.75 | \n",
+ " personal auto | \n",
+ " luxury | \n",
+ "
\n",
+ " \n",
+ " | 993 | \n",
+ " IL40123 | \n",
+ " arizona | \n",
+ " male | \n",
+ " doctor | \n",
+ " 2017196.15 | \n",
+ " 83640 | \n",
+ " 70 | \n",
+ " 0 | \n",
+ " 471.05 | \n",
+ " personal auto | \n",
+ " two-door car | \n",
+ "
\n",
+ " \n",
+ " | 994 | \n",
+ " MY32149 | \n",
+ " california | \n",
+ " female | \n",
+ " bachelor | \n",
+ " 1646436.59 | \n",
+ " 0 | \n",
+ " 96 | \n",
+ " 0 | \n",
+ " 28.46 | \n",
+ " personal auto | \n",
+ " suv | \n",
+ "
\n",
+ " \n",
+ " | 995 | \n",
+ " SA91515 | \n",
+ " oregon | \n",
+ " female | \n",
+ " high school or below | \n",
+ " 559538.99 | \n",
+ " 0 | \n",
+ " 111 | \n",
+ " 0 | \n",
+ " 700.35 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
996 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state gender education \\\n",
+ "0 GS98873 washington unknown master \n",
+ "1 CW49887 arizona female bachelor \n",
+ "2 MY31220 nevada female bachelor \n",
+ "3 UH35128 california male bachelor \n",
+ "4 WH52799 washington male high school or below \n",
+ ".. ... ... ... ... \n",
+ "991 HV85198 oregon female high school or below \n",
+ "992 BS91566 arizona male bachelor \n",
+ "993 IL40123 arizona male doctor \n",
+ "994 MY32149 california female bachelor \n",
+ "995 SA91515 oregon female high school or below \n",
+ "\n",
+ " customer_lifetime_value income monthly_premium_auto \\\n",
+ "0 0.00 16061 88 \n",
+ "1 697953.59 79487 114 \n",
+ "2 1288743.17 54230 112 \n",
+ "3 764586.18 71210 214 \n",
+ "4 536307.65 94903 94 \n",
+ ".. ... ... ... \n",
+ "991 2153133.28 63513 70 \n",
+ "992 1262283.27 58161 68 \n",
+ "993 2017196.15 83640 70 \n",
+ "994 1646436.59 0 96 \n",
+ "995 559538.99 0 111 \n",
+ "\n",
+ " number_of_open_complaints total_claim_amount policy_type \\\n",
+ "0 0 633.60 personal auto \n",
+ "1 0 547.20 personal auto \n",
+ "2 0 537.60 personal auto \n",
+ "3 1 1027.20 corporate auto \n",
+ "4 0 451.20 personal auto \n",
+ ".. ... ... ... \n",
+ "991 0 185.67 personal auto \n",
+ "992 0 140.75 personal auto \n",
+ "993 0 471.05 personal auto \n",
+ "994 0 28.46 personal auto \n",
+ "995 0 700.35 personal auto \n",
+ "\n",
+ " vehicle_class \n",
+ "0 four-door car \n",
+ "1 four-door car \n",
+ "2 two-door car \n",
+ "3 suv \n",
+ "4 four-door car \n",
+ ".. ... \n",
+ "991 four-door car \n",
+ "992 luxury \n",
+ "993 two-door car \n",
+ "994 suv \n",
+ "995 four-door car \n",
+ "\n",
+ "[996 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "54b16d91",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "customer 0\n",
+ "state 0\n",
+ "gender 0\n",
+ "education 0\n",
+ "customer_lifetime_value 0\n",
+ "income 0\n",
+ "monthly_premium_auto 0\n",
+ "number_of_open_complaints 0\n",
+ "total_claim_amount 0\n",
+ "policy_type 0\n",
+ "vehicle_class 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1a7fab3b",
+ "metadata": {},
+ "source": [
+ "# Cleaning df3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "377f69cb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " State | \n",
+ " Customer Lifetime Value | \n",
+ " Education | \n",
+ " Gender | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Total Claim Amount | \n",
+ " Vehicle Class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " SA25987 | \n",
+ " Washington | \n",
+ " 3479.137523 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 0 | \n",
+ " 104 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 499.200000 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " TB86706 | \n",
+ " Arizona | \n",
+ " 2502.637401 | \n",
+ " Master | \n",
+ " M | \n",
+ " 0 | \n",
+ " 66 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 3.468912 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " ZL73902 | \n",
+ " Nevada | \n",
+ " 3265.156348 | \n",
+ " Bachelor | \n",
+ " F | \n",
+ " 25820 | \n",
+ " 82 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 393.600000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " KX23516 | \n",
+ " California | \n",
+ " 4455.843406 | \n",
+ " High School or Below | \n",
+ " F | \n",
+ " 0 | \n",
+ " 121 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 699.615192 | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " FN77294 | \n",
+ " California | \n",
+ " 7704.958480 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 30366 | \n",
+ " 101 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " 484.800000 | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 7065 | \n",
+ " LA72316 | \n",
+ " California | \n",
+ " 23405.987980 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 71941 | \n",
+ " 73 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 198.234764 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 7066 | \n",
+ " PK87824 | \n",
+ " California | \n",
+ " 3096.511217 | \n",
+ " College | \n",
+ " F | \n",
+ " 21604 | \n",
+ " 79 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " 379.200000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 7067 | \n",
+ " TD14365 | \n",
+ " California | \n",
+ " 8163.890428 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 0 | \n",
+ " 85 | \n",
+ " 3 | \n",
+ " Corporate Auto | \n",
+ " 790.784983 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 7068 | \n",
+ " UP19263 | \n",
+ " California | \n",
+ " 7524.442436 | \n",
+ " College | \n",
+ " M | \n",
+ " 21941 | \n",
+ " 96 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 691.200000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 7069 | \n",
+ " Y167826 | \n",
+ " California | \n",
+ " 2611.836866 | \n",
+ " College | \n",
+ " M | \n",
+ " 0 | \n",
+ " 77 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " 369.600000 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
7070 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer State Customer Lifetime Value Education \\\n",
+ "0 SA25987 Washington 3479.137523 High School or Below \n",
+ "1 TB86706 Arizona 2502.637401 Master \n",
+ "2 ZL73902 Nevada 3265.156348 Bachelor \n",
+ "3 KX23516 California 4455.843406 High School or Below \n",
+ "4 FN77294 California 7704.958480 High School or Below \n",
+ "... ... ... ... ... \n",
+ "7065 LA72316 California 23405.987980 Bachelor \n",
+ "7066 PK87824 California 3096.511217 College \n",
+ "7067 TD14365 California 8163.890428 Bachelor \n",
+ "7068 UP19263 California 7524.442436 College \n",
+ "7069 Y167826 California 2611.836866 College \n",
+ "\n",
+ " Gender Income Monthly Premium Auto Number of Open Complaints \\\n",
+ "0 M 0 104 0 \n",
+ "1 M 0 66 0 \n",
+ "2 F 25820 82 0 \n",
+ "3 F 0 121 0 \n",
+ "4 M 30366 101 2 \n",
+ "... ... ... ... ... \n",
+ "7065 M 71941 73 0 \n",
+ "7066 F 21604 79 0 \n",
+ "7067 M 0 85 3 \n",
+ "7068 M 21941 96 0 \n",
+ "7069 M 0 77 0 \n",
+ "\n",
+ " Policy Type Total Claim Amount Vehicle Class \n",
+ "0 Personal Auto 499.200000 Two-Door Car \n",
+ "1 Personal Auto 3.468912 Two-Door Car \n",
+ "2 Personal Auto 393.600000 Four-Door Car \n",
+ "3 Personal Auto 699.615192 SUV \n",
+ "4 Personal Auto 484.800000 SUV \n",
+ "... ... ... ... \n",
+ "7065 Personal Auto 198.234764 Four-Door Car \n",
+ "7066 Corporate Auto 379.200000 Four-Door Car \n",
+ "7067 Corporate Auto 790.784983 Four-Door Car \n",
+ "7068 Personal Auto 691.200000 Four-Door Car \n",
+ "7069 Corporate Auto 369.600000 Two-Door Car \n",
+ "\n",
+ "[7070 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "17283368",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Customer', 'State', 'Customer Lifetime Value', 'Education', 'Gender',\n",
+ " 'Income', 'Monthly Premium Auto', 'Number of Open Complaints',\n",
+ " 'Policy Type', 'Total Claim Amount', 'Vehicle Class'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df3.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "c964a69b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['customer', 'state', 'customer_lifetime_value', 'education', 'gender',\n",
+ " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n",
+ " 'policy_type', 'total_claim_amount', 'vehicle_class'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df3.columns = (df3.columns.str.strip().str.lower().str.replace(\" \", \"_\"))\n",
+ "df3 = df3.rename(columns={\"st\": \"state\"})\n",
+ "df3.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "eefd8d37",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "text_columns = [\"gender\", \"education\", \"policy_type\", \"vehicle_class\", \"state\"]\n",
+ "for column in text_columns:\n",
+ " df3[column] = df3[column].str.lower().str.strip()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "d1005163",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " education | \n",
+ " gender | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " number_of_open_complaints | \n",
+ " policy_type | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " SA25987 | \n",
+ " washington | \n",
+ " 3479.137523 | \n",
+ " high school or below | \n",
+ " m | \n",
+ " 0 | \n",
+ " 104 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " 499.200000 | \n",
+ " two-door car | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " TB86706 | \n",
+ " arizona | \n",
+ " 2502.637401 | \n",
+ " master | \n",
+ " m | \n",
+ " 0 | \n",
+ " 66 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " 3.468912 | \n",
+ " two-door car | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " ZL73902 | \n",
+ " nevada | \n",
+ " 3265.156348 | \n",
+ " bachelor | \n",
+ " f | \n",
+ " 25820 | \n",
+ " 82 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " 393.600000 | \n",
+ " four-door car | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " KX23516 | \n",
+ " california | \n",
+ " 4455.843406 | \n",
+ " high school or below | \n",
+ " f | \n",
+ " 0 | \n",
+ " 121 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " 699.615192 | \n",
+ " suv | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " FN77294 | \n",
+ " california | \n",
+ " 7704.958480 | \n",
+ " high school or below | \n",
+ " m | \n",
+ " 30366 | \n",
+ " 101 | \n",
+ " 2 | \n",
+ " personal auto | \n",
+ " 484.800000 | \n",
+ " suv | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 7065 | \n",
+ " LA72316 | \n",
+ " california | \n",
+ " 23405.987980 | \n",
+ " bachelor | \n",
+ " m | \n",
+ " 71941 | \n",
+ " 73 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " 198.234764 | \n",
+ " four-door car | \n",
+ "
\n",
+ " \n",
+ " | 7066 | \n",
+ " PK87824 | \n",
+ " california | \n",
+ " 3096.511217 | \n",
+ " college | \n",
+ " f | \n",
+ " 21604 | \n",
+ " 79 | \n",
+ " 0 | \n",
+ " corporate auto | \n",
+ " 379.200000 | \n",
+ " four-door car | \n",
+ "
\n",
+ " \n",
+ " | 7067 | \n",
+ " TD14365 | \n",
+ " california | \n",
+ " 8163.890428 | \n",
+ " bachelor | \n",
+ " m | \n",
+ " 0 | \n",
+ " 85 | \n",
+ " 3 | \n",
+ " corporate auto | \n",
+ " 790.784983 | \n",
+ " four-door car | \n",
+ "
\n",
+ " \n",
+ " | 7068 | \n",
+ " UP19263 | \n",
+ " california | \n",
+ " 7524.442436 | \n",
+ " college | \n",
+ " m | \n",
+ " 21941 | \n",
+ " 96 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " 691.200000 | \n",
+ " four-door car | \n",
+ "
\n",
+ " \n",
+ " | 7069 | \n",
+ " Y167826 | \n",
+ " california | \n",
+ " 2611.836866 | \n",
+ " college | \n",
+ " m | \n",
+ " 0 | \n",
+ " 77 | \n",
+ " 0 | \n",
+ " corporate auto | \n",
+ " 369.600000 | \n",
+ " two-door car | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
7070 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state customer_lifetime_value education \\\n",
+ "0 SA25987 washington 3479.137523 high school or below \n",
+ "1 TB86706 arizona 2502.637401 master \n",
+ "2 ZL73902 nevada 3265.156348 bachelor \n",
+ "3 KX23516 california 4455.843406 high school or below \n",
+ "4 FN77294 california 7704.958480 high school or below \n",
+ "... ... ... ... ... \n",
+ "7065 LA72316 california 23405.987980 bachelor \n",
+ "7066 PK87824 california 3096.511217 college \n",
+ "7067 TD14365 california 8163.890428 bachelor \n",
+ "7068 UP19263 california 7524.442436 college \n",
+ "7069 Y167826 california 2611.836866 college \n",
+ "\n",
+ " gender income monthly_premium_auto number_of_open_complaints \\\n",
+ "0 m 0 104 0 \n",
+ "1 m 0 66 0 \n",
+ "2 f 25820 82 0 \n",
+ "3 f 0 121 0 \n",
+ "4 m 30366 101 2 \n",
+ "... ... ... ... ... \n",
+ "7065 m 71941 73 0 \n",
+ "7066 f 21604 79 0 \n",
+ "7067 m 0 85 3 \n",
+ "7068 m 21941 96 0 \n",
+ "7069 m 0 77 0 \n",
+ "\n",
+ " policy_type total_claim_amount vehicle_class \n",
+ "0 personal auto 499.200000 two-door car \n",
+ "1 personal auto 3.468912 two-door car \n",
+ "2 personal auto 393.600000 four-door car \n",
+ "3 personal auto 699.615192 suv \n",
+ "4 personal auto 484.800000 suv \n",
+ "... ... ... ... \n",
+ "7065 personal auto 198.234764 four-door car \n",
+ "7066 corporate auto 379.200000 four-door car \n",
+ "7067 corporate auto 790.784983 four-door car \n",
+ "7068 personal auto 691.200000 four-door car \n",
+ "7069 corporate auto 369.600000 two-door car \n",
+ "\n",
+ "[7070 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "48018657",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([3479.137523, 2502.637401, 3265.156348, ..., 8163.890428,\n",
+ " 7524.442436, 2611.836866])"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df3[\"vehicle_class\"] = df3[\"vehicle_class\"].replace({\"luxury suv\": \"luxury\", \"luxury car\": \"luxury\", \"sports car\": \"luxury\", \"luxery\": \"luxury\" })\n",
+ "df3[\"customer_lifetime_value\"].unique()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "17ba33fe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df3[\"number_of_open_complaints\"] = df3[\"number_of_open_complaints\"].astype(int)\n",
+ "df3[\"customer_lifetime_value\"] = df3[\"customer_lifetime_value\"].round(2)\n",
+ "df3[\"total_claim_amount\"] = df3[\"total_claim_amount\"].round(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "6e6b8ca0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "customer 0\n",
+ "state 0\n",
+ "customer_lifetime_value 0\n",
+ "education 0\n",
+ "gender 0\n",
+ "income 0\n",
+ "monthly_premium_auto 0\n",
+ "number_of_open_complaints 0\n",
+ "policy_type 0\n",
+ "total_claim_amount 0\n",
+ "vehicle_class 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df3.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "95d6e7fa",
+ "metadata": {},
+ "source": [
+ "## Last checks before merging"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "41b9ddbc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['customer', 'state', 'gender', 'education', 'customer_lifetime_value',\n",
+ " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n",
+ " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "8d871ec0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['customer', 'state', 'gender', 'education', 'customer_lifetime_value',\n",
+ " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n",
+ " 'total_claim_amount', 'policy_type', 'vehicle_class'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "217fcbfa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['customer', 'state', 'customer_lifetime_value', 'education', 'gender',\n",
+ " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n",
+ " 'policy_type', 'total_claim_amount', 'vehicle_class'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df3.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "af4e7429",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cols = df1.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "2a68032b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df2 = df2[cols]\n",
+ "df3 = df3[cols]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "c4afa62d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['customer', 'state', 'gender', 'education', 'customer_lifetime_value',\n",
+ " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n",
+ " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "884587cb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['customer', 'state', 'gender', 'education', 'customer_lifetime_value',\n",
+ " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n",
+ " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "1473ee10",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['customer', 'state', 'gender', 'education', 'customer_lifetime_value',\n",
+ " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n",
+ " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df3.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "c09a78a3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "comb_df = pd.concat([df1, df2, df3], axis=0).reset_index(drop=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "6f49d6b9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " gender | \n",
+ " education | \n",
+ " customer_lifetime_value | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " number_of_open_complaints | \n",
+ " policy_type | \n",
+ " vehicle_class | \n",
+ " total_claim_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " RB50392 | \n",
+ " washington | \n",
+ " unknown | \n",
+ " master | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 1000 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 2.70 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " QZ44356 | \n",
+ " arizona | \n",
+ " female | \n",
+ " bachelor | \n",
+ " 697953.59 | \n",
+ " 0 | \n",
+ " 94 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 1131.46 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AI49188 | \n",
+ " nevada | \n",
+ " female | \n",
+ " bachelor | \n",
+ " 1288743.17 | \n",
+ " 48767 | \n",
+ " 108 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " two-door car | \n",
+ " 566.47 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " WW63253 | \n",
+ " california | \n",
+ " male | \n",
+ " bachelor | \n",
+ " 764586.18 | \n",
+ " 0 | \n",
+ " 106 | \n",
+ " 0 | \n",
+ " corporate auto | \n",
+ " suv | \n",
+ " 529.88 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " GA49547 | \n",
+ " washington | \n",
+ " male | \n",
+ " high school or below | \n",
+ " 536307.65 | \n",
+ " 36357 | \n",
+ " 68 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 17.27 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 9132 | \n",
+ " LA72316 | \n",
+ " california | \n",
+ " m | \n",
+ " bachelor | \n",
+ " 23405.99 | \n",
+ " 71941 | \n",
+ " 73 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 198.23 | \n",
+ "
\n",
+ " \n",
+ " | 9133 | \n",
+ " PK87824 | \n",
+ " california | \n",
+ " f | \n",
+ " college | \n",
+ " 3096.51 | \n",
+ " 21604 | \n",
+ " 79 | \n",
+ " 0 | \n",
+ " corporate auto | \n",
+ " four-door car | \n",
+ " 379.20 | \n",
+ "
\n",
+ " \n",
+ " | 9134 | \n",
+ " TD14365 | \n",
+ " california | \n",
+ " m | \n",
+ " bachelor | \n",
+ " 8163.89 | \n",
+ " 0 | \n",
+ " 85 | \n",
+ " 3 | \n",
+ " corporate auto | \n",
+ " four-door car | \n",
+ " 790.78 | \n",
+ "
\n",
+ " \n",
+ " | 9135 | \n",
+ " UP19263 | \n",
+ " california | \n",
+ " m | \n",
+ " college | \n",
+ " 7524.44 | \n",
+ " 21941 | \n",
+ " 96 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 691.20 | \n",
+ "
\n",
+ " \n",
+ " | 9136 | \n",
+ " Y167826 | \n",
+ " california | \n",
+ " m | \n",
+ " college | \n",
+ " 2611.84 | \n",
+ " 0 | \n",
+ " 77 | \n",
+ " 0 | \n",
+ " corporate auto | \n",
+ " two-door car | \n",
+ " 369.60 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
9137 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state gender education \\\n",
+ "0 RB50392 washington unknown master \n",
+ "1 QZ44356 arizona female bachelor \n",
+ "2 AI49188 nevada female bachelor \n",
+ "3 WW63253 california male bachelor \n",
+ "4 GA49547 washington male high school or below \n",
+ "... ... ... ... ... \n",
+ "9132 LA72316 california m bachelor \n",
+ "9133 PK87824 california f college \n",
+ "9134 TD14365 california m bachelor \n",
+ "9135 UP19263 california m college \n",
+ "9136 Y167826 california m college \n",
+ "\n",
+ " customer_lifetime_value income monthly_premium_auto \\\n",
+ "0 0.00 0 1000 \n",
+ "1 697953.59 0 94 \n",
+ "2 1288743.17 48767 108 \n",
+ "3 764586.18 0 106 \n",
+ "4 536307.65 36357 68 \n",
+ "... ... ... ... \n",
+ "9132 23405.99 71941 73 \n",
+ "9133 3096.51 21604 79 \n",
+ "9134 8163.89 0 85 \n",
+ "9135 7524.44 21941 96 \n",
+ "9136 2611.84 0 77 \n",
+ "\n",
+ " number_of_open_complaints policy_type vehicle_class \\\n",
+ "0 0 personal auto four-door car \n",
+ "1 0 personal auto four-door car \n",
+ "2 0 personal auto two-door car \n",
+ "3 0 corporate auto suv \n",
+ "4 0 personal auto four-door car \n",
+ "... ... ... ... \n",
+ "9132 0 personal auto four-door car \n",
+ "9133 0 corporate auto four-door car \n",
+ "9134 3 corporate auto four-door car \n",
+ "9135 0 personal auto four-door car \n",
+ "9136 0 corporate auto two-door car \n",
+ "\n",
+ " total_claim_amount \n",
+ "0 2.70 \n",
+ "1 1131.46 \n",
+ "2 566.47 \n",
+ "3 529.88 \n",
+ "4 17.27 \n",
+ "... ... \n",
+ "9132 198.23 \n",
+ "9133 379.20 \n",
+ "9134 790.78 \n",
+ "9135 691.20 \n",
+ "9136 369.60 \n",
+ "\n",
+ "[9137 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "comb_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "f111a761",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(9137, 11)"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "comb_df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "0d8ddfd4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " gender | \n",
+ " education | \n",
+ " customer_lifetime_value | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " number_of_open_complaints | \n",
+ " policy_type | \n",
+ " vehicle_class | \n",
+ " total_claim_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " RB50392 | \n",
+ " washington | \n",
+ " unknown | \n",
+ " master | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 1000 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 2.70 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " QZ44356 | \n",
+ " arizona | \n",
+ " female | \n",
+ " bachelor | \n",
+ " 697953.59 | \n",
+ " 0 | \n",
+ " 94 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 1131.46 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AI49188 | \n",
+ " nevada | \n",
+ " female | \n",
+ " bachelor | \n",
+ " 1288743.17 | \n",
+ " 48767 | \n",
+ " 108 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " two-door car | \n",
+ " 566.47 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " WW63253 | \n",
+ " california | \n",
+ " male | \n",
+ " bachelor | \n",
+ " 764586.18 | \n",
+ " 0 | \n",
+ " 106 | \n",
+ " 0 | \n",
+ " corporate auto | \n",
+ " suv | \n",
+ " 529.88 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " GA49547 | \n",
+ " washington | \n",
+ " male | \n",
+ " high school or below | \n",
+ " 536307.65 | \n",
+ " 36357 | \n",
+ " 68 | \n",
+ " 0 | \n",
+ " personal auto | \n",
+ " four-door car | \n",
+ " 17.27 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state gender education \\\n",
+ "0 RB50392 washington unknown master \n",
+ "1 QZ44356 arizona female bachelor \n",
+ "2 AI49188 nevada female bachelor \n",
+ "3 WW63253 california male bachelor \n",
+ "4 GA49547 washington male high school or below \n",
+ "\n",
+ " customer_lifetime_value income monthly_premium_auto \\\n",
+ "0 0.00 0 1000 \n",
+ "1 697953.59 0 94 \n",
+ "2 1288743.17 48767 108 \n",
+ "3 764586.18 0 106 \n",
+ "4 536307.65 36357 68 \n",
+ "\n",
+ " number_of_open_complaints policy_type vehicle_class \\\n",
+ "0 0 personal auto four-door car \n",
+ "1 0 personal auto four-door car \n",
+ "2 0 personal auto two-door car \n",
+ "3 0 corporate auto suv \n",
+ "4 0 personal auto four-door car \n",
+ "\n",
+ " total_claim_amount \n",
+ "0 2.70 \n",
+ "1 1131.46 \n",
+ "2 566.47 \n",
+ "3 529.88 \n",
+ "4 17.27 "
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "comb_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "cb0dd902",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "customer object\n",
+ "state object\n",
+ "gender object\n",
+ "education object\n",
+ "customer_lifetime_value float64\n",
+ "income int64\n",
+ "monthly_premium_auto int64\n",
+ "number_of_open_complaints int64\n",
+ "policy_type object\n",
+ "vehicle_class object\n",
+ "total_claim_amount float64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "comb_df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d4a003cd",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d89b38da",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "17742cba",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "markdown",
"id": "31b8a9e7-7db9-4604-991b-ef6771603e57",
@@ -72,14 +3264,505 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 56,
"id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26",
"metadata": {
"id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26"
},
"outputs": [],
"source": [
- "# Your code goes here"
+ "import pandas as pd\n",
+ "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\"\n",
+ "df = pd.read_csv(url)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "ffacfc9f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unnamed:_0 | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " response | \n",
+ " coverage | \n",
+ " education | \n",
+ " effective_to_date | \n",
+ " employmentstatus | \n",
+ " gender | \n",
+ " ... | \n",
+ " number_of_policies | \n",
+ " policy_type | \n",
+ " policy | \n",
+ " renew_offer_type | \n",
+ " sales_channel | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ " vehicle_size | \n",
+ " vehicle_type | \n",
+ " month | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 10905 | \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-19 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 10906 | \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 10907 | \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2011-02-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 10908 | \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2011-02-13 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 10909 | \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " California | \n",
+ " 6857.519928 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-01-08 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " ... | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unnamed:_0 customer state customer_lifetime_value response \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "... ... ... ... ... ... \n",
+ "10905 10905 FE99816 Nevada 15563.369440 No \n",
+ "10906 10906 KX53892 Oregon 5259.444853 No \n",
+ "10907 10907 TL39050 Arizona 23893.304100 No \n",
+ "10908 10908 WA60547 California 11971.977650 No \n",
+ "10909 10909 IV32877 California 6857.519928 No \n",
+ "\n",
+ " coverage education effective_to_date employmentstatus gender ... \\\n",
+ "0 Basic College 2011-02-18 Employed M ... \n",
+ "1 Basic College 2011-01-18 Unemployed F ... \n",
+ "2 Basic Bachelor 2011-02-10 Employed M ... \n",
+ "3 Extended College 2011-01-11 Employed M ... \n",
+ "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Premium Bachelor 2011-01-19 Unemployed F ... \n",
+ "10906 Basic College 2011-01-06 Employed F ... \n",
+ "10907 Extended Bachelor 2011-02-06 Employed F ... \n",
+ "10908 Premium College 2011-02-13 Employed F ... \n",
+ "10909 Basic Bachelor 2011-01-08 Unemployed M ... \n",
+ "\n",
+ " number_of_policies policy_type policy renew_offer_type \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "... ... ... ... ... \n",
+ "10905 7 Personal Auto Personal L1 Offer3 \n",
+ "10906 6 Personal Auto Personal L3 Offer2 \n",
+ "10907 2 Corporate Auto Corporate L3 Offer1 \n",
+ "10908 6 Personal Auto Personal L1 Offer1 \n",
+ "10909 3 Personal Auto Personal L1 Offer4 \n",
+ "\n",
+ " sales_channel total_claim_amount vehicle_class vehicle_size \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "10905 Web 1214.400000 Luxury Car Medsize \n",
+ "10906 Branch 273.018929 Four-Door Car Medsize \n",
+ "10907 Web 381.306996 Luxury SUV Medsize \n",
+ "10908 Branch 618.288849 SUV Medsize \n",
+ "10909 Web 1021.719397 SUV Medsize \n",
+ "\n",
+ " vehicle_type month \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "... ... ... \n",
+ "10905 A 1 \n",
+ "10906 A 1 \n",
+ "10907 A 2 \n",
+ "10908 A 2 \n",
+ "10909 A 1 \n",
+ "\n",
+ "[10910 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "3f630515",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['Arizona', 'California', 'Washington', 'Oregon', 'Nevada'],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"state\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "7bfdd4f3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "unnamed:_0 int64\n",
+ "customer object\n",
+ "state object\n",
+ "customer_lifetime_value float64\n",
+ "response object\n",
+ "coverage object\n",
+ "education object\n",
+ "effective_to_date object\n",
+ "employmentstatus object\n",
+ "gender object\n",
+ "income int64\n",
+ "location_code object\n",
+ "marital_status object\n",
+ "monthly_premium_auto int64\n",
+ "months_since_last_claim float64\n",
+ "months_since_policy_inception int64\n",
+ "number_of_open_complaints float64\n",
+ "number_of_policies int64\n",
+ "policy_type object\n",
+ "policy object\n",
+ "renew_offer_type object\n",
+ "sales_channel object\n",
+ "total_claim_amount float64\n",
+ "vehicle_class object\n",
+ "vehicle_size object\n",
+ "vehicle_type object\n",
+ "month int64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "id": "a8cccc6e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['Agent', 'Call Center', 'Branch', 'Web'], dtype=object)"
+ ]
+ },
+ "execution_count": 60,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"sales_channel\"].unique()"
]
},
{
@@ -93,6 +3776,89 @@
"Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "id": "68f9b972",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " total_claim_amount | \n",
+ "
\n",
+ " \n",
+ " | sales_channel | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Agent | \n",
+ " 1810226.82 | \n",
+ "
\n",
+ " \n",
+ " | Branch | \n",
+ " 1301204.00 | \n",
+ "
\n",
+ " \n",
+ " | Call Center | \n",
+ " 926600.82 | \n",
+ "
\n",
+ " \n",
+ " | Web | \n",
+ " 706600.04 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " total_claim_amount\n",
+ "sales_channel \n",
+ "Agent 1810226.82\n",
+ "Branch 1301204.00\n",
+ "Call Center 926600.82\n",
+ "Web 706600.04"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pivot_sales_channel = df.pivot_table(index=\"sales_channel\", values=\"total_claim_amount\", aggfunc='sum')\n",
+ "pivot_sales_channel = pivot_sales_channel.round(2)\n",
+ "pivot_sales_channel"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "900557e2",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "markdown",
"id": "640993b2-a291-436c-a34d-a551144f8196",
@@ -103,6 +3869,103 @@
"2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "id": "803a15bb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | education | \n",
+ " Bachelor | \n",
+ " College | \n",
+ " Doctor | \n",
+ " High School or Below | \n",
+ " Master | \n",
+ "
\n",
+ " \n",
+ " | gender | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | F | \n",
+ " 7874.27 | \n",
+ " 7748.82 | \n",
+ " 7328.51 | \n",
+ " 8675.22 | \n",
+ " 8157.05 | \n",
+ "
\n",
+ " \n",
+ " | M | \n",
+ " 7703.60 | \n",
+ " 8052.46 | \n",
+ " 7415.33 | \n",
+ " 8149.69 | \n",
+ " 8168.83 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "education Bachelor College Doctor High School or Below Master\n",
+ "gender \n",
+ "F 7874.27 7748.82 7328.51 8675.22 8157.05\n",
+ "M 7703.60 8052.46 7415.33 8149.69 8168.83"
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pivot_lifetime_value = df.pivot_table(index=\"gender\", columns=\"education\", values=\"customer_lifetime_value\",aggfunc=\"mean\")\n",
+ "pivot_lifetime_value = pivot_lifetime_value.round(2)\n",
+ "pivot_lifetime_value"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2a7cef6e",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d64c57dc",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "markdown",
"id": "32c7f2e5-3d90-43e5-be33-9781b6069198",
@@ -130,7 +3993,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 63,
"id": "3a069e0b-b400-470e-904d-d17582191be4",
"metadata": {
"id": "3a069e0b-b400-470e-904d-d17582191be4"
@@ -160,7 +4023,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.11.4"
}
},
"nbformat": 4,