diff --git a/.ipynb_checkpoints/lab-dw-data-structuring-and-combining-checkpoint.ipynb b/.ipynb_checkpoints/lab-dw-data-structuring-and-combining-checkpoint.ipynb
new file mode 100644
index 0000000..ed5767d
--- /dev/null
+++ b/.ipynb_checkpoints/lab-dw-data-structuring-and-combining-checkpoint.ipynb
@@ -0,0 +1,4692 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "25d7736c-ba17-4aff-b6bb-66eba20fbf4e",
+ "metadata": {
+ "id": "25d7736c-ba17-4aff-b6bb-66eba20fbf4e"
+ },
+ "source": [
+ "# Lab | Data Structuring and Combining Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a2cdfc70-44c8-478c-81e7-2bc43fdf4986",
+ "metadata": {
+ "id": "a2cdfc70-44c8-478c-81e7-2bc43fdf4986"
+ },
+ "source": [
+ "## Challenge 1: Combining & Cleaning Data\n",
+ "\n",
+ "In this challenge, we will be working with the customer data from an insurance company, as we did in the two previous labs. The data can be found here:\n",
+ "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\n",
+ "\n",
+ "But this time, we got new data, which can be found in the following 2 CSV files located at the links below.\n",
+ "\n",
+ "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\n",
+ "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\n",
+ "\n",
+ "Note that you'll need to clean and format the new data.\n",
+ "\n",
+ "Observation:\n",
+ "- One option is to first combine the three datasets and then apply the cleaning function to the new combined dataset\n",
+ "- Another option would be to read the clean file you saved in the previous lab, and just clean the two new files and concatenate the three clean datasets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "492d06e3-92c7-4105-ac72-536db98d3244",
+ "metadata": {
+ "id": "492d06e3-92c7-4105-ac72-536db98d3244"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "f16864ee-90a0-4e99-b966-c6fdcfaa1c61",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url_1 = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv'\n",
+ "file_1 = pd.read_csv(url_1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "4061bf4b-2501-4342-a8a4-ba8a1c025ac7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 4003 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4004 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4005 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4006 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4007 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4008 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "4003 NaN NaN NaN NaN \n",
+ "4004 NaN NaN NaN NaN \n",
+ "4005 NaN NaN NaN NaN \n",
+ "4006 NaN NaN NaN NaN \n",
+ "4007 NaN NaN NaN NaN \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Total Claim Amount \n",
+ "0 2.704934 \n",
+ "1 1131.464935 \n",
+ "2 566.472247 \n",
+ "3 529.881344 \n",
+ "4 17.269323 \n",
+ "... ... \n",
+ "4003 NaN \n",
+ "4004 NaN \n",
+ "4005 NaN \n",
+ "4006 NaN \n",
+ "4007 NaN \n",
+ "\n",
+ "[4008 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "file_1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "4e7a952f-bcb2-4a36-84d2-fa0fe5865193",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url_2 = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv'\n",
+ "file_2 = pd.read_csv(url_2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "e0781ace-fd1f-44ba-99dc-f8758d70b0a3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Total Claim Amount | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " GS98873 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 323912.47% | \n",
+ " 16061 | \n",
+ " 88 | \n",
+ " 1/0/00 | \n",
+ " 633.600000 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 462680.11% | \n",
+ " 79487 | \n",
+ " 114 | \n",
+ " 1/0/00 | \n",
+ " 547.200000 | \n",
+ " Special Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " MY31220 | \n",
+ " California | \n",
+ " F | \n",
+ " College | \n",
+ " 899704.02% | \n",
+ " 54230 | \n",
+ " 112 | \n",
+ " 1/0/00 | \n",
+ " 537.600000 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " UH35128 | \n",
+ " Oregon | \n",
+ " F | \n",
+ " College | \n",
+ " 2580706.30% | \n",
+ " 71210 | \n",
+ " 214 | \n",
+ " 1/1/00 | \n",
+ " 1027.200000 | \n",
+ " Personal Auto | \n",
+ " Luxury Car | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " WH52799 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 380812.21% | \n",
+ " 94903 | \n",
+ " 94 | \n",
+ " 1/0/00 | \n",
+ " 451.200000 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 991 | \n",
+ " HV85198 | \n",
+ " Arizona | \n",
+ " M | \n",
+ " Master | \n",
+ " 847141.75% | \n",
+ " 63513 | \n",
+ " 70 | \n",
+ " 1/0/00 | \n",
+ " 185.667213 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 992 | \n",
+ " BS91566 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 543121.91% | \n",
+ " 58161 | \n",
+ " 68 | \n",
+ " 1/0/00 | \n",
+ " 140.747286 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 993 | \n",
+ " IL40123 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " College | \n",
+ " 568964.41% | \n",
+ " 83640 | \n",
+ " 70 | \n",
+ " 1/0/00 | \n",
+ " 471.050488 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 994 | \n",
+ " MY32149 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 368672.38% | \n",
+ " 0 | \n",
+ " 96 | \n",
+ " 1/0/00 | \n",
+ " 28.460568 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 995 | \n",
+ " SA91515 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 399258.39% | \n",
+ " 0 | \n",
+ " 111 | \n",
+ " 1/0/00 | \n",
+ " 700.349052 | \n",
+ " Personal Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
996 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education Customer Lifetime Value Income \\\n",
+ "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n",
+ "1 CW49887 California F Master 462680.11% 79487 \n",
+ "2 MY31220 California F College 899704.02% 54230 \n",
+ "3 UH35128 Oregon F College 2580706.30% 71210 \n",
+ "4 WH52799 Arizona F College 380812.21% 94903 \n",
+ ".. ... ... ... ... ... ... \n",
+ "991 HV85198 Arizona M Master 847141.75% 63513 \n",
+ "992 BS91566 Arizona F College 543121.91% 58161 \n",
+ "993 IL40123 Nevada F College 568964.41% 83640 \n",
+ "994 MY32149 California F Master 368672.38% 0 \n",
+ "995 SA91515 California M Bachelor 399258.39% 0 \n",
+ "\n",
+ " Monthly Premium Auto Number of Open Complaints Total Claim Amount \\\n",
+ "0 88 1/0/00 633.600000 \n",
+ "1 114 1/0/00 547.200000 \n",
+ "2 112 1/0/00 537.600000 \n",
+ "3 214 1/1/00 1027.200000 \n",
+ "4 94 1/0/00 451.200000 \n",
+ ".. ... ... ... \n",
+ "991 70 1/0/00 185.667213 \n",
+ "992 68 1/0/00 140.747286 \n",
+ "993 70 1/0/00 471.050488 \n",
+ "994 96 1/0/00 28.460568 \n",
+ "995 111 1/0/00 700.349052 \n",
+ "\n",
+ " Policy Type Vehicle Class \n",
+ "0 Personal Auto Four-Door Car \n",
+ "1 Special Auto SUV \n",
+ "2 Personal Auto Two-Door Car \n",
+ "3 Personal Auto Luxury Car \n",
+ "4 Corporate Auto Two-Door Car \n",
+ ".. ... ... \n",
+ "991 Personal Auto Four-Door Car \n",
+ "992 Corporate Auto Four-Door Car \n",
+ "993 Corporate Auto Two-Door Car \n",
+ "994 Personal Auto Two-Door Car \n",
+ "995 Personal Auto SUV \n",
+ "\n",
+ "[996 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "file_2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "b31d8c6f-6c64-49a6-964b-d509454b7af1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url_3 = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv'\n",
+ "file_3 = pd.read_csv(url_3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "4d9d81df-db75-47ba-8553-c748ee4c7d68",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " State | \n",
+ " Customer Lifetime Value | \n",
+ " Education | \n",
+ " Gender | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Total Claim Amount | \n",
+ " Vehicle Class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " SA25987 | \n",
+ " Washington | \n",
+ " 3479.137523 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 0 | \n",
+ " 104 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 499.200000 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " TB86706 | \n",
+ " Arizona | \n",
+ " 2502.637401 | \n",
+ " Master | \n",
+ " M | \n",
+ " 0 | \n",
+ " 66 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 3.468912 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ZL73902 | \n",
+ " Nevada | \n",
+ " 3265.156348 | \n",
+ " Bachelor | \n",
+ " F | \n",
+ " 25820 | \n",
+ " 82 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 393.600000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " KX23516 | \n",
+ " California | \n",
+ " 4455.843406 | \n",
+ " High School or Below | \n",
+ " F | \n",
+ " 0 | \n",
+ " 121 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 699.615192 | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " FN77294 | \n",
+ " California | \n",
+ " 7704.958480 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 30366 | \n",
+ " 101 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " 484.800000 | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 7065 | \n",
+ " LA72316 | \n",
+ " California | \n",
+ " 23405.987980 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 71941 | \n",
+ " 73 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 198.234764 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 7066 | \n",
+ " PK87824 | \n",
+ " California | \n",
+ " 3096.511217 | \n",
+ " College | \n",
+ " F | \n",
+ " 21604 | \n",
+ " 79 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " 379.200000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 7067 | \n",
+ " TD14365 | \n",
+ " California | \n",
+ " 8163.890428 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 0 | \n",
+ " 85 | \n",
+ " 3 | \n",
+ " Corporate Auto | \n",
+ " 790.784983 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 7068 | \n",
+ " UP19263 | \n",
+ " California | \n",
+ " 7524.442436 | \n",
+ " College | \n",
+ " M | \n",
+ " 21941 | \n",
+ " 96 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 691.200000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 7069 | \n",
+ " Y167826 | \n",
+ " California | \n",
+ " 2611.836866 | \n",
+ " College | \n",
+ " M | \n",
+ " 0 | \n",
+ " 77 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " 369.600000 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
7070 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer State Customer Lifetime Value Education \\\n",
+ "0 SA25987 Washington 3479.137523 High School or Below \n",
+ "1 TB86706 Arizona 2502.637401 Master \n",
+ "2 ZL73902 Nevada 3265.156348 Bachelor \n",
+ "3 KX23516 California 4455.843406 High School or Below \n",
+ "4 FN77294 California 7704.958480 High School or Below \n",
+ "... ... ... ... ... \n",
+ "7065 LA72316 California 23405.987980 Bachelor \n",
+ "7066 PK87824 California 3096.511217 College \n",
+ "7067 TD14365 California 8163.890428 Bachelor \n",
+ "7068 UP19263 California 7524.442436 College \n",
+ "7069 Y167826 California 2611.836866 College \n",
+ "\n",
+ " Gender Income Monthly Premium Auto Number of Open Complaints \\\n",
+ "0 M 0 104 0 \n",
+ "1 M 0 66 0 \n",
+ "2 F 25820 82 0 \n",
+ "3 F 0 121 0 \n",
+ "4 M 30366 101 2 \n",
+ "... ... ... ... ... \n",
+ "7065 M 71941 73 0 \n",
+ "7066 F 21604 79 0 \n",
+ "7067 M 0 85 3 \n",
+ "7068 M 21941 96 0 \n",
+ "7069 M 0 77 0 \n",
+ "\n",
+ " Policy Type Total Claim Amount Vehicle Class \n",
+ "0 Personal Auto 499.200000 Two-Door Car \n",
+ "1 Personal Auto 3.468912 Two-Door Car \n",
+ "2 Personal Auto 393.600000 Four-Door Car \n",
+ "3 Personal Auto 699.615192 SUV \n",
+ "4 Personal Auto 484.800000 SUV \n",
+ "... ... ... ... \n",
+ "7065 Personal Auto 198.234764 Four-Door Car \n",
+ "7066 Corporate Auto 379.200000 Four-Door Car \n",
+ "7067 Corporate Auto 790.784983 Four-Door Car \n",
+ "7068 Personal Auto 691.200000 Four-Door Car \n",
+ "7069 Corporate Auto 369.600000 Two-Door Car \n",
+ "\n",
+ "[7070 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "file_3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "80fde7a0-3405-491f-bc14-5d8d9ba026a4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Concatenate vertically \n",
+ "file_combined = pd.concat([file_1, file_2, file_3], ignore_index=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "94add22a-b471-4cc4-9710-b2a6e26ecf97",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ " State | \n",
+ " Gender | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 12069 | \n",
+ " LA72316 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 23405.98798 | \n",
+ " 71941.0 | \n",
+ " 73.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 198.234764 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12070 | \n",
+ " PK87824 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 3096.511217 | \n",
+ " 21604.0 | \n",
+ " 79.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 379.200000 | \n",
+ " California | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " 12071 | \n",
+ " TD14365 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 8163.890428 | \n",
+ " 0.0 | \n",
+ " 85.0 | \n",
+ " 3 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 790.784983 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12072 | \n",
+ " UP19263 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 7524.442436 | \n",
+ " 21941.0 | \n",
+ " 96.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 691.200000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12073 | \n",
+ " Y167826 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 2611.836866 | \n",
+ " 0.0 | \n",
+ " 77.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ " 369.600000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
12074 rows × 13 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "12069 LA72316 NaN NaN Bachelor \n",
+ "12070 PK87824 NaN NaN College \n",
+ "12071 TD14365 NaN NaN Bachelor \n",
+ "12072 UP19263 NaN NaN College \n",
+ "12073 Y167826 NaN NaN College \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "12069 23405.98798 71941.0 73.0 \n",
+ "12070 3096.511217 21604.0 79.0 \n",
+ "12071 8163.890428 0.0 85.0 \n",
+ "12072 7524.442436 21941.0 96.0 \n",
+ "12073 2611.836866 0.0 77.0 \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "12069 0 Personal Auto Four-Door Car \n",
+ "12070 0 Corporate Auto Four-Door Car \n",
+ "12071 3 Corporate Auto Four-Door Car \n",
+ "12072 0 Personal Auto Four-Door Car \n",
+ "12073 0 Corporate Auto Two-Door Car \n",
+ "\n",
+ " Total Claim Amount State Gender \n",
+ "0 2.704934 NaN NaN \n",
+ "1 1131.464935 NaN NaN \n",
+ "2 566.472247 NaN NaN \n",
+ "3 529.881344 NaN NaN \n",
+ "4 17.269323 NaN NaN \n",
+ "... ... ... ... \n",
+ "12069 198.234764 California M \n",
+ "12070 379.200000 California F \n",
+ "12071 790.784983 California M \n",
+ "12072 691.200000 California M \n",
+ "12073 369.600000 California M \n",
+ "\n",
+ "[12074 rows x 13 columns]"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Save the result to a new CSV\n",
+ "file_combined.to_csv(\"file_combined.csv\", index=False)\n",
+ "file_combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "caab6731-9b5d-471f-8cd9-368402168c3f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Original columns: ['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value', 'Income', 'Monthly Premium Auto', 'Number of Open Complaints', 'Policy Type', 'Vehicle Class', 'Total Claim Amount', 'State', 'Gender']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Original columns:\", file_combined.columns.tolist())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "dea296b8-efcd-45e4-8f21-3b0313c0ae46",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Function to clean column names\n",
+ "def clean_column_names(columns):\n",
+ " new_cols = []\n",
+ " for col in columns:\n",
+ " col = col.upper() \n",
+ " col = col.strip() \n",
+ " col = col.replace(\" \", \"_\") \n",
+ " new_cols.append(col)\n",
+ " return new_cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "b18a1916-8122-4b89-9e32-0e4095492698",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "file_combined.columns = clean_column_names(file_combined.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "62a01a94-850b-42ca-876b-96b2246b87cf",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " EDUCATION | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " INCOME | \n",
+ " MONTHLY_PREMIUM_AUTO | \n",
+ " NUMBER_OF_OPEN_COMPLAINTS | \n",
+ " POLICY_TYPE | \n",
+ " VEHICLE_CLASS | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " STATE | \n",
+ " GENDER | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 12069 | \n",
+ " LA72316 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 23405.98798 | \n",
+ " 71941.0 | \n",
+ " 73.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 198.234764 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12070 | \n",
+ " PK87824 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 3096.511217 | \n",
+ " 21604.0 | \n",
+ " 79.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 379.200000 | \n",
+ " California | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " 12071 | \n",
+ " TD14365 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 8163.890428 | \n",
+ " 0.0 | \n",
+ " 85.0 | \n",
+ " 3 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 790.784983 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12072 | \n",
+ " UP19263 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 7524.442436 | \n",
+ " 21941.0 | \n",
+ " 96.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 691.200000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12073 | \n",
+ " Y167826 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 2611.836866 | \n",
+ " 0.0 | \n",
+ " 77.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ " 369.600000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
12074 rows × 13 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER ST GENDER EDUCATION \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "12069 LA72316 NaN NaN Bachelor \n",
+ "12070 PK87824 NaN NaN College \n",
+ "12071 TD14365 NaN NaN Bachelor \n",
+ "12072 UP19263 NaN NaN College \n",
+ "12073 Y167826 NaN NaN College \n",
+ "\n",
+ " CUSTOMER_LIFETIME_VALUE INCOME MONTHLY_PREMIUM_AUTO \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "12069 23405.98798 71941.0 73.0 \n",
+ "12070 3096.511217 21604.0 79.0 \n",
+ "12071 8163.890428 0.0 85.0 \n",
+ "12072 7524.442436 21941.0 96.0 \n",
+ "12073 2611.836866 0.0 77.0 \n",
+ "\n",
+ " NUMBER_OF_OPEN_COMPLAINTS POLICY_TYPE VEHICLE_CLASS \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "12069 0 Personal Auto Four-Door Car \n",
+ "12070 0 Corporate Auto Four-Door Car \n",
+ "12071 3 Corporate Auto Four-Door Car \n",
+ "12072 0 Personal Auto Four-Door Car \n",
+ "12073 0 Corporate Auto Two-Door Car \n",
+ "\n",
+ " TOTAL_CLAIM_AMOUNT STATE GENDER \n",
+ "0 2.704934 NaN NaN \n",
+ "1 1131.464935 NaN NaN \n",
+ "2 566.472247 NaN NaN \n",
+ "3 529.881344 NaN NaN \n",
+ "4 17.269323 NaN NaN \n",
+ "... ... ... ... \n",
+ "12069 198.234764 California M \n",
+ "12070 379.200000 California F \n",
+ "12071 790.784983 California M \n",
+ "12072 691.200000 California M \n",
+ "12073 369.600000 California M \n",
+ "\n",
+ "[12074 rows x 13 columns]"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "file_combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "8793b688-af31-4938-aea3-3f7bd12412e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# data_cleaning.py\n",
+ "\n",
+ "def standardize_gender(file_combined, column=\"GENDER\"):\n",
+ " \"\"\"Standardize Gender column to 'M' and 'F'.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " file_combined[column] = file_combined[column].str.upper().replace({\n",
+ " \"FEMALE\": \"F\",\n",
+ " \"FEMAL\": \"F\",\n",
+ " \"MALE\": \"M\"\n",
+ " })\n",
+ " return file_combined\n",
+ " \n",
+ "def standardize_states(file_combined, column=\"STATES\"):\n",
+ " \"\"\"Replace state abbreviations with full names.\"\"\"\n",
+ " state_mapping = {\n",
+ " \"AZ\": \"Arizona\",\n",
+ " \"Cali\": \"California\",\n",
+ " \"WA\": \"Washington\"\n",
+ " }\n",
+ " if column in file_combined.columns:\n",
+ " file_combined[column] = file_combined[column].replace(state_mapping)\n",
+ " return file_combined\n",
+ "\n",
+ "def standardize_education(file_combined, column=\"EDUCATION\"):\n",
+ " \"\"\"Standardize education column (Bachelors -> Bachelor).\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " file_combined[column] = file_combined[column].replace({\n",
+ " \"Bachelors\": \"Bachelor\"\n",
+ " })\n",
+ " return file_combined\n",
+ "\n",
+ "def clean_customer_lifetime_value(file_combined, column=\"CUSTOMER_LIFETIME_VALUE\"):\n",
+ " \"\"\"Remove % or $ and convert to numeric.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " file_combined[column] = file_combined[column].astype(str).str.replace(\"%\", \"\").str.replace(\"$\", \"\").str.strip()\n",
+ " file_combined[column] = pd.to_numeric(file_combined[column], errors='coerce')\n",
+ " return file_combined\n",
+ "\n",
+ "def standardize_vehicle_class(file_combined, column=\"VEHICLE_CLASS\"):\n",
+ " \"\"\"Standardize vehicle class to simpler categories.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " file_combined[column] = file_combined[column].replace({\n",
+ " \"Sports Car\": \"Luxury\",\n",
+ " \"Luxury SUV\": \"Luxury\",\n",
+ " \"Luxury Car\": \"Luxury\"\n",
+ " })\n",
+ " return file_combined\n",
+ "\n",
+ "def clean_number_of_open_complaints(file_combined, column=\"NUMBER_OF_OPEN_COMPLAINTS\"):\n",
+ " \"\"\"Extract middle value from strings like '1/5/00' and convert to numeric.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " def extract_middle(value):\n",
+ " if pd.isna(value):\n",
+ " return 0\n",
+ " parts = str(value).split(\"/\")\n",
+ " return int(parts[1]) if len(parts) >= 2 else int(parts[0])\n",
+ " file_combined[column] = file_combined[column].apply(extract_middle)\n",
+ " return file_combined\n",
+ "\n",
+ "def fill_missing_values(file_combined):\n",
+ " \"\"\"Fill missing numeric values with mean, categorical with mode.\"\"\"\n",
+ " num_cols = file_combined.select_dtypes(include=['int64', 'float64']).columns\n",
+ " cat_cols = file_combined.select_dtypes(include=['object', 'category']).columns\n",
+ "\n",
+ " for col in num_cols:\n",
+ " file_combined[col].fillna(file_combined[col].mean(), inplace=True)\n",
+ " for col in cat_cols:\n",
+ " file_combined[col].fillna(file_combined[col].mode()[0], inplace=True)\n",
+ " return file_combined\n",
+ "\n",
+ "def drop_duplicates(file_combined, subset=None):\n",
+ " \"\"\"Drop duplicate rows based on optional subset of columns.\"\"\"\n",
+ " file_combined.drop_duplicates(subset=subset, keep='first', inplace=True)\n",
+ " file_combined.reset_index(drop=True, inplace=True)\n",
+ " return file_combined\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "b8b5114a-311a-4d7a-9f36-6d2813470031",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def main(file_combined):\n",
+ " \"\"\"Main function to perform all cleaning and formatting steps.\"\"\"\n",
+ " file_combined = standardize_gender(file_combined)\n",
+ " file_combined = standardize_states(file_combined)\n",
+ " file_combined = standardize_education(file_combined)\n",
+ " file_combined = clean_customer_lifetime_value(file_combined)\n",
+ " file_combined = standardize_vehicle_class(file_combined)\n",
+ " file_combined = clean_number_of_open_complaints(file_combined)\n",
+ " file_combined = fill_missing_values(file_combined)\n",
+ " file_combined = drop_duplicates(file_combined)\n",
+ " return file_combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "e068e8c9-bbc8-4238-8312-a0caf55e20bb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " EDUCATION | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " INCOME | \n",
+ " MONTHLY_PREMIUM_AUTO | \n",
+ " NUMBER_OF_OPEN_COMPLAINTS | \n",
+ " POLICY_TYPE | \n",
+ " VEHICLE_CLASS | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " STATE | \n",
+ " GENDER | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 12069 | \n",
+ " LA72316 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 23405.98798 | \n",
+ " 71941.0 | \n",
+ " 73.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 198.234764 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12070 | \n",
+ " PK87824 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 3096.511217 | \n",
+ " 21604.0 | \n",
+ " 79.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 379.200000 | \n",
+ " California | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " 12071 | \n",
+ " TD14365 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 8163.890428 | \n",
+ " 0.0 | \n",
+ " 85.0 | \n",
+ " 3 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 790.784983 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12072 | \n",
+ " UP19263 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 7524.442436 | \n",
+ " 21941.0 | \n",
+ " 96.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 691.200000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12073 | \n",
+ " Y167826 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 2611.836866 | \n",
+ " 0.0 | \n",
+ " 77.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ " 369.600000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
12074 rows × 13 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER ST GENDER EDUCATION \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "12069 LA72316 NaN NaN Bachelor \n",
+ "12070 PK87824 NaN NaN College \n",
+ "12071 TD14365 NaN NaN Bachelor \n",
+ "12072 UP19263 NaN NaN College \n",
+ "12073 Y167826 NaN NaN College \n",
+ "\n",
+ " CUSTOMER_LIFETIME_VALUE INCOME MONTHLY_PREMIUM_AUTO \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "12069 23405.98798 71941.0 73.0 \n",
+ "12070 3096.511217 21604.0 79.0 \n",
+ "12071 8163.890428 0.0 85.0 \n",
+ "12072 7524.442436 21941.0 96.0 \n",
+ "12073 2611.836866 0.0 77.0 \n",
+ "\n",
+ " NUMBER_OF_OPEN_COMPLAINTS POLICY_TYPE VEHICLE_CLASS \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "12069 0 Personal Auto Four-Door Car \n",
+ "12070 0 Corporate Auto Four-Door Car \n",
+ "12071 3 Corporate Auto Four-Door Car \n",
+ "12072 0 Personal Auto Four-Door Car \n",
+ "12073 0 Corporate Auto Two-Door Car \n",
+ "\n",
+ " TOTAL_CLAIM_AMOUNT STATE GENDER \n",
+ "0 2.704934 NaN NaN \n",
+ "1 1131.464935 NaN NaN \n",
+ "2 566.472247 NaN NaN \n",
+ "3 529.881344 NaN NaN \n",
+ "4 17.269323 NaN NaN \n",
+ "... ... ... ... \n",
+ "12069 198.234764 California M \n",
+ "12070 379.200000 California F \n",
+ "12071 790.784983 California M \n",
+ "12072 691.200000 California M \n",
+ "12073 369.600000 California M \n",
+ "\n",
+ "[12074 rows x 13 columns]"
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "file_combined"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "31b8a9e7-7db9-4604-991b-ef6771603e57",
+ "metadata": {
+ "id": "31b8a9e7-7db9-4604-991b-ef6771603e57"
+ },
+ "source": [
+ "# Challenge 2: Structuring Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a877fd6d-7a0c-46d2-9657-f25036e4ca4b",
+ "metadata": {
+ "id": "a877fd6d-7a0c-46d2-9657-f25036e4ca4b"
+ },
+ "source": [
+ "In this challenge, we will continue to work with customer data from an insurance company, but we will use a dataset with more columns, called marketing_customer_analysis.csv, which can be found at the following link:\n",
+ "\n",
+ "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\n",
+ "\n",
+ "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by performing data cleaning, formatting, and structuring."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26",
+ "metadata": {
+ "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26"
+ },
+ "outputs": [],
+ "source": [
+ "url_4 = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv'\n",
+ "marketing_customer_analysis = pd.read_csv(url_4)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "92d101fa-4da1-41ff-8e28-59e295975fa4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unnamed:_0 | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " response | \n",
+ " coverage | \n",
+ " education | \n",
+ " effective_to_date | \n",
+ " employmentstatus | \n",
+ " gender | \n",
+ " ... | \n",
+ " number_of_policies | \n",
+ " policy_type | \n",
+ " policy | \n",
+ " renew_offer_type | \n",
+ " sales_channel | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ " vehicle_size | \n",
+ " vehicle_type | \n",
+ " month | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-19 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2011-02-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2011-02-13 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " California | \n",
+ " 6857.519928 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-01-08 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " ... | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unnamed:_0 customer state customer_lifetime_value response \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "... ... ... ... ... ... \n",
+ "10905 10905 FE99816 Nevada 15563.369440 No \n",
+ "10906 10906 KX53892 Oregon 5259.444853 No \n",
+ "10907 10907 TL39050 Arizona 23893.304100 No \n",
+ "10908 10908 WA60547 California 11971.977650 No \n",
+ "10909 10909 IV32877 California 6857.519928 No \n",
+ "\n",
+ " coverage education effective_to_date employmentstatus gender ... \\\n",
+ "0 Basic College 2011-02-18 Employed M ... \n",
+ "1 Basic College 2011-01-18 Unemployed F ... \n",
+ "2 Basic Bachelor 2011-02-10 Employed M ... \n",
+ "3 Extended College 2011-01-11 Employed M ... \n",
+ "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Premium Bachelor 2011-01-19 Unemployed F ... \n",
+ "10906 Basic College 2011-01-06 Employed F ... \n",
+ "10907 Extended Bachelor 2011-02-06 Employed F ... \n",
+ "10908 Premium College 2011-02-13 Employed F ... \n",
+ "10909 Basic Bachelor 2011-01-08 Unemployed M ... \n",
+ "\n",
+ " number_of_policies policy_type policy renew_offer_type \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "... ... ... ... ... \n",
+ "10905 7 Personal Auto Personal L1 Offer3 \n",
+ "10906 6 Personal Auto Personal L3 Offer2 \n",
+ "10907 2 Corporate Auto Corporate L3 Offer1 \n",
+ "10908 6 Personal Auto Personal L1 Offer1 \n",
+ "10909 3 Personal Auto Personal L1 Offer4 \n",
+ "\n",
+ " sales_channel total_claim_amount vehicle_class vehicle_size \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "10905 Web 1214.400000 Luxury Car Medsize \n",
+ "10906 Branch 273.018929 Four-Door Car Medsize \n",
+ "10907 Web 381.306996 Luxury SUV Medsize \n",
+ "10908 Branch 618.288849 SUV Medsize \n",
+ "10909 Web 1021.719397 SUV Medsize \n",
+ "\n",
+ " vehicle_type month \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "... ... ... \n",
+ "10905 A 1 \n",
+ "10906 A 1 \n",
+ "10907 A 2 \n",
+ "10908 A 2 \n",
+ "10909 A 1 \n",
+ "\n",
+ "[10910 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "0d23888a-a380-4d87-9532-a4f699391488",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Original columns: ['unnamed:_0', 'customer', 'state', 'customer_lifetime_value', 'response', 'coverage', 'education', 'effective_to_date', 'employmentstatus', 'gender', 'income', 'location_code', 'marital_status', 'monthly_premium_auto', 'months_since_last_claim', 'months_since_policy_inception', 'number_of_open_complaints', 'number_of_policies', 'policy_type', 'policy', 'renew_offer_type', 'sales_channel', 'total_claim_amount', 'vehicle_class', 'vehicle_size', 'vehicle_type', 'month']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Original columns:\", marketing_customer_analysis.columns.tolist())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "8677783d-31c0-40fb-a5f6-264712e2b07a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Function to clean column names\n",
+ "def clean_column_names_1(columns):\n",
+ " new_cols = []\n",
+ " for col in columns:\n",
+ " col = col.upper() \n",
+ " col = col.strip() \n",
+ " col = col.replace(\" \", \"_\") \n",
+ " new_cols.append(col)\n",
+ " return new_cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "da4ee899-45da-4eeb-8424-9bd4e0ba643e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "marketing_customer_analysis.columns = clean_column_names_1(marketing_customer_analysis.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "aa5f9d49-2c73-4fb9-958d-7a44b7484882",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " UNNAMED:_0 | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENTSTATUS | \n",
+ " GENDER | \n",
+ " ... | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ " MONTH | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-19 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2011-02-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2011-02-13 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " California | \n",
+ " 6857.519928 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-01-08 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " ... | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " UNNAMED:_0 CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "... ... ... ... ... ... \n",
+ "10905 10905 FE99816 Nevada 15563.369440 No \n",
+ "10906 10906 KX53892 Oregon 5259.444853 No \n",
+ "10907 10907 TL39050 Arizona 23893.304100 No \n",
+ "10908 10908 WA60547 California 11971.977650 No \n",
+ "10909 10909 IV32877 California 6857.519928 No \n",
+ "\n",
+ " COVERAGE EDUCATION EFFECTIVE_TO_DATE EMPLOYMENTSTATUS GENDER ... \\\n",
+ "0 Basic College 2011-02-18 Employed M ... \n",
+ "1 Basic College 2011-01-18 Unemployed F ... \n",
+ "2 Basic Bachelor 2011-02-10 Employed M ... \n",
+ "3 Extended College 2011-01-11 Employed M ... \n",
+ "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Premium Bachelor 2011-01-19 Unemployed F ... \n",
+ "10906 Basic College 2011-01-06 Employed F ... \n",
+ "10907 Extended Bachelor 2011-02-06 Employed F ... \n",
+ "10908 Premium College 2011-02-13 Employed F ... \n",
+ "10909 Basic Bachelor 2011-01-08 Unemployed M ... \n",
+ "\n",
+ " NUMBER_OF_POLICIES POLICY_TYPE POLICY RENEW_OFFER_TYPE \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "... ... ... ... ... \n",
+ "10905 7 Personal Auto Personal L1 Offer3 \n",
+ "10906 6 Personal Auto Personal L3 Offer2 \n",
+ "10907 2 Corporate Auto Corporate L3 Offer1 \n",
+ "10908 6 Personal Auto Personal L1 Offer1 \n",
+ "10909 3 Personal Auto Personal L1 Offer4 \n",
+ "\n",
+ " SALES_CHANNEL TOTAL_CLAIM_AMOUNT VEHICLE_CLASS VEHICLE_SIZE \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "10905 Web 1214.400000 Luxury Car Medsize \n",
+ "10906 Branch 273.018929 Four-Door Car Medsize \n",
+ "10907 Web 381.306996 Luxury SUV Medsize \n",
+ "10908 Branch 618.288849 SUV Medsize \n",
+ "10909 Web 1021.719397 SUV Medsize \n",
+ "\n",
+ " VEHICLE_TYPE MONTH \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "... ... ... \n",
+ "10905 A 1 \n",
+ "10906 A 1 \n",
+ "10907 A 2 \n",
+ "10908 A 2 \n",
+ "10909 A 1 \n",
+ "\n",
+ "[10910 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "2eec22a8-4bbb-4655-90ed-a24025c3eaa5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " UNNAMED:_0 | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENT_STATUS | \n",
+ " GENDER | \n",
+ " ... | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ " MONTH | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-19 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2011-02-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2011-02-13 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " California | \n",
+ " 6857.519928 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-01-08 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " ... | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " UNNAMED:_0 CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "... ... ... ... ... ... \n",
+ "10905 10905 FE99816 Nevada 15563.369440 No \n",
+ "10906 10906 KX53892 Oregon 5259.444853 No \n",
+ "10907 10907 TL39050 Arizona 23893.304100 No \n",
+ "10908 10908 WA60547 California 11971.977650 No \n",
+ "10909 10909 IV32877 California 6857.519928 No \n",
+ "\n",
+ " COVERAGE EDUCATION EFFECTIVE_TO_DATE EMPLOYMENT_STATUS GENDER ... \\\n",
+ "0 Basic College 2011-02-18 Employed M ... \n",
+ "1 Basic College 2011-01-18 Unemployed F ... \n",
+ "2 Basic Bachelor 2011-02-10 Employed M ... \n",
+ "3 Extended College 2011-01-11 Employed M ... \n",
+ "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Premium Bachelor 2011-01-19 Unemployed F ... \n",
+ "10906 Basic College 2011-01-06 Employed F ... \n",
+ "10907 Extended Bachelor 2011-02-06 Employed F ... \n",
+ "10908 Premium College 2011-02-13 Employed F ... \n",
+ "10909 Basic Bachelor 2011-01-08 Unemployed M ... \n",
+ "\n",
+ " NUMBER_OF_POLICIES POLICY_TYPE POLICY RENEW_OFFER_TYPE \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "... ... ... ... ... \n",
+ "10905 7 Personal Auto Personal L1 Offer3 \n",
+ "10906 6 Personal Auto Personal L3 Offer2 \n",
+ "10907 2 Corporate Auto Corporate L3 Offer1 \n",
+ "10908 6 Personal Auto Personal L1 Offer1 \n",
+ "10909 3 Personal Auto Personal L1 Offer4 \n",
+ "\n",
+ " SALES_CHANNEL TOTAL_CLAIM_AMOUNT VEHICLE_CLASS VEHICLE_SIZE \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "10905 Web 1214.400000 Luxury Car Medsize \n",
+ "10906 Branch 273.018929 Four-Door Car Medsize \n",
+ "10907 Web 381.306996 Luxury SUV Medsize \n",
+ "10908 Branch 618.288849 SUV Medsize \n",
+ "10909 Web 1021.719397 SUV Medsize \n",
+ "\n",
+ " VEHICLE_TYPE MONTH \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "... ... ... \n",
+ "10905 A 1 \n",
+ "10906 A 1 \n",
+ "10907 A 2 \n",
+ "10908 A 2 \n",
+ "10909 A 1 \n",
+ "\n",
+ "[10910 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis = marketing_customer_analysis.rename(columns={'EMPLOYMENTSTATUS': 'EMPLOYMENT_STATUS'})\n",
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "891a398b-70c8-4243-be8d-ce16faa7ebad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENT_STATUS | \n",
+ " GENDER | \n",
+ " INCOME | \n",
+ " ... | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ " MONTH | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 48029 | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 22139 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 49078 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " 23675 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-19 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 61146 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2011-02-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 39837 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2011-02-13 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 64195 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " California | \n",
+ " 6857.519928 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-01-08 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE COVERAGE \\\n",
+ "0 DK49336 Arizona 4809.216960 No Basic \n",
+ "1 KX64629 California 2228.525238 No Basic \n",
+ "2 LZ68649 Washington 14947.917300 No Basic \n",
+ "3 XL78013 Oregon 22332.439460 Yes Extended \n",
+ "4 QA50777 Oregon 9025.067525 No Premium \n",
+ "... ... ... ... ... ... \n",
+ "10905 FE99816 Nevada 15563.369440 No Premium \n",
+ "10906 KX53892 Oregon 5259.444853 No Basic \n",
+ "10907 TL39050 Arizona 23893.304100 No Extended \n",
+ "10908 WA60547 California 11971.977650 No Premium \n",
+ "10909 IV32877 California 6857.519928 No Basic \n",
+ "\n",
+ " EDUCATION EFFECTIVE_TO_DATE EMPLOYMENT_STATUS GENDER INCOME ... \\\n",
+ "0 College 2011-02-18 Employed M 48029 ... \n",
+ "1 College 2011-01-18 Unemployed F 0 ... \n",
+ "2 Bachelor 2011-02-10 Employed M 22139 ... \n",
+ "3 College 2011-01-11 Employed M 49078 ... \n",
+ "4 Bachelor 2011-01-17 Medical Leave F 23675 ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Bachelor 2011-01-19 Unemployed F 0 ... \n",
+ "10906 College 2011-01-06 Employed F 61146 ... \n",
+ "10907 Bachelor 2011-02-06 Employed F 39837 ... \n",
+ "10908 College 2011-02-13 Employed F 64195 ... \n",
+ "10909 Bachelor 2011-01-08 Unemployed M 0 ... \n",
+ "\n",
+ " NUMBER_OF_POLICIES POLICY_TYPE POLICY RENEW_OFFER_TYPE \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "... ... ... ... ... \n",
+ "10905 7 Personal Auto Personal L1 Offer3 \n",
+ "10906 6 Personal Auto Personal L3 Offer2 \n",
+ "10907 2 Corporate Auto Corporate L3 Offer1 \n",
+ "10908 6 Personal Auto Personal L1 Offer1 \n",
+ "10909 3 Personal Auto Personal L1 Offer4 \n",
+ "\n",
+ " SALES_CHANNEL TOTAL_CLAIM_AMOUNT VEHICLE_CLASS VEHICLE_SIZE \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "10905 Web 1214.400000 Luxury Car Medsize \n",
+ "10906 Branch 273.018929 Four-Door Car Medsize \n",
+ "10907 Web 381.306996 Luxury SUV Medsize \n",
+ "10908 Branch 618.288849 SUV Medsize \n",
+ "10909 Web 1021.719397 SUV Medsize \n",
+ "\n",
+ " VEHICLE_TYPE MONTH \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "... ... ... \n",
+ "10905 A 1 \n",
+ "10906 A 1 \n",
+ "10907 A 2 \n",
+ "10908 A 2 \n",
+ "10909 A 1 \n",
+ "\n",
+ "[10910 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis = marketing_customer_analysis.drop(columns=[\"UNNAMED:_0\"])\n",
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "d5a262a3-d76e-4001-97e4-42120f7cca1b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# data_cleaning.py\n",
+ "\n",
+ "def standardize_gender(marketing_customer_analysis, column=\"GENDER\"):\n",
+ " \"\"\"Standardize Gender column to 'M' and 'F'.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].str.upper().replace({\n",
+ " \"FEMALE\": \"F\",\n",
+ " \"FEMAL\": \"F\",\n",
+ " \"MALE\": \"M\"\n",
+ " })\n",
+ " return marketing_customer_analysis\n",
+ " \n",
+ "def standardize_states(marketing_customer_analysis, column=\"STATES\"):\n",
+ " \"\"\"Replace state abbreviations with full names.\"\"\"\n",
+ " state_mapping = {\n",
+ " \"AZ\": \"Arizona\",\n",
+ " \"Cali\": \"California\",\n",
+ " \"WA\": \"Washington\"\n",
+ " }\n",
+ " if column in marketing_customer_analysis.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].replace(state_mapping)\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def standardize_education(marketing_customer_analysis, column=\"EDUCATION\"):\n",
+ " \"\"\"Standardize education column (Bachelors -> Bachelor).\"\"\"\n",
+ " if column in marketing_customer_analysis.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].replace({\n",
+ " \"Bachelors\": \"Bachelor\"\n",
+ " })\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def clean_customer_lifetime_value(marketing_customer_analysis, column=\"CUSTOMER_LIFETIME_VALUE\"):\n",
+ " \"\"\"Remove % or $ and convert to numeric.\"\"\"\n",
+ " if column in marketing_customer_analysis.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].astype(str).str.replace(\"%\", \"\").str.replace(\"$\", \"\").str.strip()\n",
+ " marketing_customer_analysis[column] = pd.to_numeric(marketing_customer_analysis[column], errors='coerce')\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def standardize_vehicle_class(marketing_customer_analysis, column=\"VEHICLE_CLASS\"):\n",
+ " \"\"\"Standardize vehicle class to simpler categories.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].replace({\n",
+ " \"Sports Car\": \"Luxury\",\n",
+ " \"Luxury SUV\": \"Luxury\",\n",
+ " \"Luxury Car\": \"Luxury\"\n",
+ " })\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def clean_number_of_open_complaints(marketing_customer_analysis, column=\"NUMBER_OF_OPEN_COMPLAINTS\"):\n",
+ " \"\"\"Extract middle value from strings like '1/5/00' and convert to numeric.\"\"\"\n",
+ " if column in marketing_customer_analysis.columns:\n",
+ " def extract_middle(value):\n",
+ " if pd.isna(value):\n",
+ " return 0\n",
+ " parts = str(value).split(\"/\")\n",
+ " return int(parts[1]) if len(parts) >= 2 else int(parts[0])\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].apply(extract_middle)\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def fill_missing_values(marketing_customer_analysis):\n",
+ " \"\"\"Fill missing numeric values with mean, categorical with mode.\"\"\"\n",
+ " num_cols = marketing_customer_analysis.select_dtypes(include=['int64', 'float64']).columns\n",
+ " cat_cols = marketing_customer_analysis.select_dtypes(include=['object', 'category']).columns\n",
+ "\n",
+ " for col in num_cols:\n",
+ " marketing_customer_analysis[col].fillna(marketing_customer_analysis[col].mean(), inplace=True)\n",
+ " for col in cat_cols:\n",
+ " marketing_customer_analysis[col].fillna(marketing_customer_analysis[col].mode()[0], inplace=True)\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def drop_duplicates(marketing_customer_analysis, subset=None):\n",
+ " \"\"\"Drop duplicate rows based on optional subset of columns.\"\"\"\n",
+ " marketing_customer_analysis.drop_duplicates(subset=subset, keep='first', inplace=True)\n",
+ " marketing_customer_analysis.reset_index(drop=True, inplace=True)\n",
+ " return marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "id": "50beed86-9a87-49e6-8492-2bfeb8fe6488",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def main(marketing_customer_analysis):\n",
+ " \"\"\"Main function to perform all cleaning and formatting steps.\"\"\"\n",
+ " marketing_customer_analysis = standardize_gender(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = standardize_states(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = standardize_education(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = clean_customer_lifetime_value(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = standardize_vehicle_class(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = clean_number_of_open_complaints(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = fill_missing_values(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = drop_duplicates(marketing_customer_analysis)\n",
+ " return marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "52d73c45-533c-4fe7-a1dd-77f2de6ab8fa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENT_STATUS | \n",
+ " GENDER | \n",
+ " INCOME | \n",
+ " ... | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ " MONTH | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 48029 | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 22139 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 49078 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " 23675 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-19 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 61146 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2011-02-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 39837 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2011-02-13 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 64195 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " California | \n",
+ " 6857.519928 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-01-08 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE COVERAGE \\\n",
+ "0 DK49336 Arizona 4809.216960 No Basic \n",
+ "1 KX64629 California 2228.525238 No Basic \n",
+ "2 LZ68649 Washington 14947.917300 No Basic \n",
+ "3 XL78013 Oregon 22332.439460 Yes Extended \n",
+ "4 QA50777 Oregon 9025.067525 No Premium \n",
+ "... ... ... ... ... ... \n",
+ "10905 FE99816 Nevada 15563.369440 No Premium \n",
+ "10906 KX53892 Oregon 5259.444853 No Basic \n",
+ "10907 TL39050 Arizona 23893.304100 No Extended \n",
+ "10908 WA60547 California 11971.977650 No Premium \n",
+ "10909 IV32877 California 6857.519928 No Basic \n",
+ "\n",
+ " EDUCATION EFFECTIVE_TO_DATE EMPLOYMENT_STATUS GENDER INCOME ... \\\n",
+ "0 College 2011-02-18 Employed M 48029 ... \n",
+ "1 College 2011-01-18 Unemployed F 0 ... \n",
+ "2 Bachelor 2011-02-10 Employed M 22139 ... \n",
+ "3 College 2011-01-11 Employed M 49078 ... \n",
+ "4 Bachelor 2011-01-17 Medical Leave F 23675 ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Bachelor 2011-01-19 Unemployed F 0 ... \n",
+ "10906 College 2011-01-06 Employed F 61146 ... \n",
+ "10907 Bachelor 2011-02-06 Employed F 39837 ... \n",
+ "10908 College 2011-02-13 Employed F 64195 ... \n",
+ "10909 Bachelor 2011-01-08 Unemployed M 0 ... \n",
+ "\n",
+ " NUMBER_OF_POLICIES POLICY_TYPE POLICY RENEW_OFFER_TYPE \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "... ... ... ... ... \n",
+ "10905 7 Personal Auto Personal L1 Offer3 \n",
+ "10906 6 Personal Auto Personal L3 Offer2 \n",
+ "10907 2 Corporate Auto Corporate L3 Offer1 \n",
+ "10908 6 Personal Auto Personal L1 Offer1 \n",
+ "10909 3 Personal Auto Personal L1 Offer4 \n",
+ "\n",
+ " SALES_CHANNEL TOTAL_CLAIM_AMOUNT VEHICLE_CLASS VEHICLE_SIZE \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "10905 Web 1214.400000 Luxury Medsize \n",
+ "10906 Branch 273.018929 Four-Door Car Medsize \n",
+ "10907 Web 381.306996 Luxury Medsize \n",
+ "10908 Branch 618.288849 SUV Medsize \n",
+ "10909 Web 1021.719397 SUV Medsize \n",
+ "\n",
+ " VEHICLE_TYPE MONTH \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "... ... ... \n",
+ "10905 A 1 \n",
+ "10906 A 1 \n",
+ "10907 A 2 \n",
+ "10908 A 2 \n",
+ "10909 A 1 \n",
+ "\n",
+ "[10910 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "3c5da8e5-9d80-4c1e-9eea-3a41dcac8c8c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['No' 'Yes']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['RESPONSE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "10480ed9-1d34-4c74-94af-8048cf85bad9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Basic' 'Extended' 'Premium']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['COVERAGE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "id": "6bfedc9c-2b80-413b-b288-7dea6c4a323d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['2011-02-18' '2011-01-18' '2011-02-10' '2011-01-11' '2011-01-17'\n",
+ " '2011-02-14' '2011-02-24' '2011-01-19' '2011-01-04' '2011-01-02'\n",
+ " '2011-02-07' '2011-01-31' '2011-01-26' '2011-02-28' '2011-01-16'\n",
+ " '2011-02-26' '2011-02-23' '2011-01-15' '2011-02-02' '2011-02-15'\n",
+ " '2011-01-24' '2011-02-21' '2011-02-22' '2011-01-07' '2011-01-28'\n",
+ " '2011-02-08' '2011-02-12' '2011-02-20' '2011-01-05' '2011-02-19'\n",
+ " '2011-01-03' '2011-02-03' '2011-01-22' '2011-01-23' '2011-02-05'\n",
+ " '2011-02-13' '2011-01-25' '2011-02-16' '2011-02-01' '2011-01-27'\n",
+ " '2011-01-12' '2011-01-20' '2011-02-06' '2011-02-11' '2011-01-21'\n",
+ " '2011-01-29' '2011-01-09' '2011-02-09' '2011-02-27' '2011-01-01'\n",
+ " '2011-02-17' '2011-02-25' '2011-01-13' '2011-01-06' '2011-02-04'\n",
+ " '2011-01-14' '2011-01-10' '2011-01-08' '2011-01-30']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['EFFECTIVE_TO_DATE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "id": "dd6e76b0-3764-4baa-aafd-39cf2deddd4b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Employed' 'Unemployed' 'Medical Leave' 'Disabled' 'Retired']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['EMPLOYMENT_STATUS'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "id": "ca38eb29-4265-46b7-8e30-caece75e461c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Suburban' 'Urban' 'Rural']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['LOCATION_CODE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "7e46c178-f99c-4282-a7d0-d7c83d1031c3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Married' 'Single' 'Divorced']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['MARITAL_STATUS'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "id": "5bb5d03c-ddc3-4318-a2c6-01bbecaf7854",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ 7. 3. 34. 10. 15.14907074 2.\n",
+ " 8. 35. 33. 19. 13. 5.\n",
+ " 24. 25. 6. 20. 26. 14.\n",
+ " 9. 29. 11. 1. 18. 16.\n",
+ " 30. 12. 4. 22. 31. 21.\n",
+ " 28. 17. 15. 32. 23. 27.\n",
+ " 0. ]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "id": "9c220fb3-ae78-434e-8878-bef9fe305b9e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "float64\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'].dtype) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "id": "7c9233d1-1f54-4b82-9e65-1478ebfb5894",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Change 'MONTHS_SINCE_LAST_CLAIM' column from float to int\n",
+ "marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'] = marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'].astype(int)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "6cc0b878-35a5-4bfc-a4a0-f9e8497340c7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "int32\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'].dtype) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "id": "3c39f96e-8afa-4ff1-8fd4-96aa07ea5f60",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ 7 3 34 10 15 2 8 35 33 19 13 5 24 25 6 20 26 14 9 29 11 1 18 16\n",
+ " 30 12 4 22 31 21 28 17 32 23 27 0]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values - check\n",
+ "print(marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "id": "04f07c50-a2b6-49eb-b0a9-0097f3efa78f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[52 26 31 3 73 99 45 24 8 29 32 25 28 87 10 74 1 38 58 37 7 80 95 78\n",
+ " 63 27 97 39 11 59 46 62 13 54 51 22 82 91 44 43 76 48 84 6 92 12 61 4\n",
+ " 18 66 70 16 75 34 35 64 9 89 0 60 71 23 55 93 2 67 81 40 57 86 19 72\n",
+ " 69 33 47 42 17 49 21 83 94 30 15 50 53 77 41 90 5 79 56 98 20 88 65 14\n",
+ " 85 96 36 68]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['MONTHS_SINCE_POLICY_INCEPTION'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "id": "d7525908-455a-4248-bd87-952998f8c661",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[9 1 2 7 4 3 6 8 5]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['NUMBER_OF_POLICIES'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "id": "09ff1c28-658a-435a-9c79-a9cc1ef7331f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Corporate L3' 'Personal L3' 'Personal L2' 'Corporate L2' 'Personal L1'\n",
+ " 'Special L1' 'Corporate L1' 'Special L3' 'Special L2']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['POLICY'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "id": "f43942b6-3250-49f0-bced-b41b2f925941",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Offer3' 'Offer4' 'Offer2' 'Offer1']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['RENEW_OFFER_TYPE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "id": "749d6f3e-3b21-49a6-a0ad-79079f6586ed",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Agent' 'Call Center' 'Branch' 'Web']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['SALES_CHANNEL'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "id": "f24f5304-385d-4542-a5d7-06a1af3d5f7f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Medsize' 'Small' 'Large']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['VEHICLE_SIZE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "id": "461455b6-e220-409b-a261-534232256532",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['A']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['VEHICLE_TYPE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "id": "0ecc63f2-323d-470c-ae8c-2326b88187f4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[2 1]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['MONTH'].unique())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "df35fd0d-513e-4e77-867e-429da10a9cc7",
+ "metadata": {
+ "id": "df35fd0d-513e-4e77-867e-429da10a9cc7"
+ },
+ "source": [
+ "1. You work at the marketing department and you want to know which sales channel brought the most sales in terms of total revenue. Using pivot, create a summary table showing the total revenue for each sales channel (branch, call center, web, and mail).\n",
+ "Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "id": "9895fd97-4a20-4121-b910-d7b8868da7aa",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " TOTAL_CLAIM_AMOUNT\n",
+ "SALES_CHANNEL \n",
+ "Agent 1810226.82\n",
+ "Branch 1301204.00\n",
+ "Call Center 926600.82\n",
+ "Web 706600.04\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Create pivot table: total revenue by sales channel\n",
+ "pivot = pd.pivot_table(\n",
+ " marketing_customer_analysis,\n",
+ " index=\"SALES_CHANNEL\",\n",
+ " values=\"TOTAL_CLAIM_AMOUNT\",\n",
+ " aggfunc=\"sum\"\n",
+ ").round(2)\n",
+ "\n",
+ "print(pivot)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "640993b2-a291-436c-a34d-a551144f8196",
+ "metadata": {
+ "id": "640993b2-a291-436c-a34d-a551144f8196"
+ },
+ "source": [
+ "2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "id": "75d4a1f7-2eb5-4ab7-9d7d-92f38be3b218",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "EDUCATION Bachelor College Doctor High School or Below Master\n",
+ "GENDER \n",
+ "F 7874.27 7748.82 7328.51 8675.22 8157.05\n",
+ "M 7703.60 8052.46 7415.33 8149.69 8168.83\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Create the pivot table\n",
+ "pivot_clv = pd.pivot_table(\n",
+ " marketing_customer_analysis,\n",
+ " index='GENDER', \n",
+ " columns='EDUCATION', \n",
+ " values='CUSTOMER_LIFETIME_VALUE', \n",
+ " aggfunc='mean' \n",
+ ").round(2) \n",
+ "\n",
+ "print(pivot_clv)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "32c7f2e5-3d90-43e5-be33-9781b6069198",
+ "metadata": {
+ "id": "32c7f2e5-3d90-43e5-be33-9781b6069198"
+ },
+ "source": [
+ "## Bonus\n",
+ "\n",
+ "You work at the customer service department and you want to know which months had the highest number of complaints by policy type category. Create a summary table showing the number of complaints by policy type and month.\n",
+ "Show it in a long format table."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e3d09a8f-953c-448a-a5f8-2e5a8cca7291",
+ "metadata": {
+ "id": "e3d09a8f-953c-448a-a5f8-2e5a8cca7291"
+ },
+ "source": [
+ "*In data analysis, a long format table is a way of structuring data in which each observation or measurement is stored in a separate row of the table. The key characteristic of a long format table is that each column represents a single variable, and each row represents a single observation of that variable.*\n",
+ "\n",
+ "*More information about long and wide format tables here: https://www.statology.org/long-vs-wide-data/*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "id": "3a069e0b-b400-470e-904d-d17582191be4",
+ "metadata": {
+ "id": "3a069e0b-b400-470e-904d-d17582191be4"
+ },
+ "outputs": [],
+ "source": [
+ "# Group by policy type and month, count complaints\n",
+ "summary = marketing_customer_analysis.groupby(['POLICY_TYPE', 'MONTH'])['NUMBER_OF_OPEN_COMPLAINTS'].sum().reset_index()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "id": "6f1f4d7a-2139-4335-8b98-3ca18d27715e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Rename column for clarity\n",
+ "summary = summary.rename(columns={'NUMBER_OF_OPEN_COMPLAINTS': 'TOTAL_COMPLAINTS'})\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "id": "29818885-526f-450e-8efc-48a2a7085f42",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " POLICY_TYPE | \n",
+ " MONTH | \n",
+ " TOTAL_COMPLAINTS | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " 1 | \n",
+ " 443.434952 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Corporate Auto | \n",
+ " 2 | \n",
+ " 385.208135 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " 1 | \n",
+ " 1727.605722 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " 2 | \n",
+ " 1453.684441 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Special Auto | \n",
+ " 1 | \n",
+ " 87.074049 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Special Auto | \n",
+ " 2 | \n",
+ " 95.226817 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " POLICY_TYPE MONTH TOTAL_COMPLAINTS\n",
+ "0 Corporate Auto 1 443.434952\n",
+ "1 Corporate Auto 2 385.208135\n",
+ "2 Personal Auto 1 1727.605722\n",
+ "3 Personal Auto 2 1453.684441\n",
+ "4 Special Auto 1 87.074049\n",
+ "5 Special Auto 2 95.226817"
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "summary"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "073e7590-4e38-4e99-946c-dfb1785ff3f6",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python [conda env:base] *",
+ "language": "python",
+ "name": "conda-base-py"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb
index ec4e3f9..ed5767d 100644
--- a/lab-dw-data-structuring-and-combining.ipynb
+++ b/lab-dw-data-structuring-and-combining.ipynb
@@ -36,14 +36,1846 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"id": "492d06e3-92c7-4105-ac72-536db98d3244",
"metadata": {
"id": "492d06e3-92c7-4105-ac72-536db98d3244"
},
"outputs": [],
"source": [
- "# Your code goes here"
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "f16864ee-90a0-4e99-b966-c6fdcfaa1c61",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url_1 = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv'\n",
+ "file_1 = pd.read_csv(url_1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "4061bf4b-2501-4342-a8a4-ba8a1c025ac7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 4003 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4004 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4005 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4006 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4007 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4008 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "4003 NaN NaN NaN NaN \n",
+ "4004 NaN NaN NaN NaN \n",
+ "4005 NaN NaN NaN NaN \n",
+ "4006 NaN NaN NaN NaN \n",
+ "4007 NaN NaN NaN NaN \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Total Claim Amount \n",
+ "0 2.704934 \n",
+ "1 1131.464935 \n",
+ "2 566.472247 \n",
+ "3 529.881344 \n",
+ "4 17.269323 \n",
+ "... ... \n",
+ "4003 NaN \n",
+ "4004 NaN \n",
+ "4005 NaN \n",
+ "4006 NaN \n",
+ "4007 NaN \n",
+ "\n",
+ "[4008 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "file_1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "4e7a952f-bcb2-4a36-84d2-fa0fe5865193",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url_2 = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv'\n",
+ "file_2 = pd.read_csv(url_2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "e0781ace-fd1f-44ba-99dc-f8758d70b0a3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Total Claim Amount | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " GS98873 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 323912.47% | \n",
+ " 16061 | \n",
+ " 88 | \n",
+ " 1/0/00 | \n",
+ " 633.600000 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 462680.11% | \n",
+ " 79487 | \n",
+ " 114 | \n",
+ " 1/0/00 | \n",
+ " 547.200000 | \n",
+ " Special Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " MY31220 | \n",
+ " California | \n",
+ " F | \n",
+ " College | \n",
+ " 899704.02% | \n",
+ " 54230 | \n",
+ " 112 | \n",
+ " 1/0/00 | \n",
+ " 537.600000 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " UH35128 | \n",
+ " Oregon | \n",
+ " F | \n",
+ " College | \n",
+ " 2580706.30% | \n",
+ " 71210 | \n",
+ " 214 | \n",
+ " 1/1/00 | \n",
+ " 1027.200000 | \n",
+ " Personal Auto | \n",
+ " Luxury Car | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " WH52799 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 380812.21% | \n",
+ " 94903 | \n",
+ " 94 | \n",
+ " 1/0/00 | \n",
+ " 451.200000 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 991 | \n",
+ " HV85198 | \n",
+ " Arizona | \n",
+ " M | \n",
+ " Master | \n",
+ " 847141.75% | \n",
+ " 63513 | \n",
+ " 70 | \n",
+ " 1/0/00 | \n",
+ " 185.667213 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 992 | \n",
+ " BS91566 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 543121.91% | \n",
+ " 58161 | \n",
+ " 68 | \n",
+ " 1/0/00 | \n",
+ " 140.747286 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 993 | \n",
+ " IL40123 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " College | \n",
+ " 568964.41% | \n",
+ " 83640 | \n",
+ " 70 | \n",
+ " 1/0/00 | \n",
+ " 471.050488 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 994 | \n",
+ " MY32149 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 368672.38% | \n",
+ " 0 | \n",
+ " 96 | \n",
+ " 1/0/00 | \n",
+ " 28.460568 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 995 | \n",
+ " SA91515 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 399258.39% | \n",
+ " 0 | \n",
+ " 111 | \n",
+ " 1/0/00 | \n",
+ " 700.349052 | \n",
+ " Personal Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
996 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education Customer Lifetime Value Income \\\n",
+ "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n",
+ "1 CW49887 California F Master 462680.11% 79487 \n",
+ "2 MY31220 California F College 899704.02% 54230 \n",
+ "3 UH35128 Oregon F College 2580706.30% 71210 \n",
+ "4 WH52799 Arizona F College 380812.21% 94903 \n",
+ ".. ... ... ... ... ... ... \n",
+ "991 HV85198 Arizona M Master 847141.75% 63513 \n",
+ "992 BS91566 Arizona F College 543121.91% 58161 \n",
+ "993 IL40123 Nevada F College 568964.41% 83640 \n",
+ "994 MY32149 California F Master 368672.38% 0 \n",
+ "995 SA91515 California M Bachelor 399258.39% 0 \n",
+ "\n",
+ " Monthly Premium Auto Number of Open Complaints Total Claim Amount \\\n",
+ "0 88 1/0/00 633.600000 \n",
+ "1 114 1/0/00 547.200000 \n",
+ "2 112 1/0/00 537.600000 \n",
+ "3 214 1/1/00 1027.200000 \n",
+ "4 94 1/0/00 451.200000 \n",
+ ".. ... ... ... \n",
+ "991 70 1/0/00 185.667213 \n",
+ "992 68 1/0/00 140.747286 \n",
+ "993 70 1/0/00 471.050488 \n",
+ "994 96 1/0/00 28.460568 \n",
+ "995 111 1/0/00 700.349052 \n",
+ "\n",
+ " Policy Type Vehicle Class \n",
+ "0 Personal Auto Four-Door Car \n",
+ "1 Special Auto SUV \n",
+ "2 Personal Auto Two-Door Car \n",
+ "3 Personal Auto Luxury Car \n",
+ "4 Corporate Auto Two-Door Car \n",
+ ".. ... ... \n",
+ "991 Personal Auto Four-Door Car \n",
+ "992 Corporate Auto Four-Door Car \n",
+ "993 Corporate Auto Two-Door Car \n",
+ "994 Personal Auto Two-Door Car \n",
+ "995 Personal Auto SUV \n",
+ "\n",
+ "[996 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "file_2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "b31d8c6f-6c64-49a6-964b-d509454b7af1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url_3 = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv'\n",
+ "file_3 = pd.read_csv(url_3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "4d9d81df-db75-47ba-8553-c748ee4c7d68",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " State | \n",
+ " Customer Lifetime Value | \n",
+ " Education | \n",
+ " Gender | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Total Claim Amount | \n",
+ " Vehicle Class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " SA25987 | \n",
+ " Washington | \n",
+ " 3479.137523 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 0 | \n",
+ " 104 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 499.200000 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " TB86706 | \n",
+ " Arizona | \n",
+ " 2502.637401 | \n",
+ " Master | \n",
+ " M | \n",
+ " 0 | \n",
+ " 66 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 3.468912 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ZL73902 | \n",
+ " Nevada | \n",
+ " 3265.156348 | \n",
+ " Bachelor | \n",
+ " F | \n",
+ " 25820 | \n",
+ " 82 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 393.600000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " KX23516 | \n",
+ " California | \n",
+ " 4455.843406 | \n",
+ " High School or Below | \n",
+ " F | \n",
+ " 0 | \n",
+ " 121 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 699.615192 | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " FN77294 | \n",
+ " California | \n",
+ " 7704.958480 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 30366 | \n",
+ " 101 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " 484.800000 | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 7065 | \n",
+ " LA72316 | \n",
+ " California | \n",
+ " 23405.987980 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 71941 | \n",
+ " 73 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 198.234764 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 7066 | \n",
+ " PK87824 | \n",
+ " California | \n",
+ " 3096.511217 | \n",
+ " College | \n",
+ " F | \n",
+ " 21604 | \n",
+ " 79 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " 379.200000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 7067 | \n",
+ " TD14365 | \n",
+ " California | \n",
+ " 8163.890428 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 0 | \n",
+ " 85 | \n",
+ " 3 | \n",
+ " Corporate Auto | \n",
+ " 790.784983 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 7068 | \n",
+ " UP19263 | \n",
+ " California | \n",
+ " 7524.442436 | \n",
+ " College | \n",
+ " M | \n",
+ " 21941 | \n",
+ " 96 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 691.200000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 7069 | \n",
+ " Y167826 | \n",
+ " California | \n",
+ " 2611.836866 | \n",
+ " College | \n",
+ " M | \n",
+ " 0 | \n",
+ " 77 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " 369.600000 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
7070 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer State Customer Lifetime Value Education \\\n",
+ "0 SA25987 Washington 3479.137523 High School or Below \n",
+ "1 TB86706 Arizona 2502.637401 Master \n",
+ "2 ZL73902 Nevada 3265.156348 Bachelor \n",
+ "3 KX23516 California 4455.843406 High School or Below \n",
+ "4 FN77294 California 7704.958480 High School or Below \n",
+ "... ... ... ... ... \n",
+ "7065 LA72316 California 23405.987980 Bachelor \n",
+ "7066 PK87824 California 3096.511217 College \n",
+ "7067 TD14365 California 8163.890428 Bachelor \n",
+ "7068 UP19263 California 7524.442436 College \n",
+ "7069 Y167826 California 2611.836866 College \n",
+ "\n",
+ " Gender Income Monthly Premium Auto Number of Open Complaints \\\n",
+ "0 M 0 104 0 \n",
+ "1 M 0 66 0 \n",
+ "2 F 25820 82 0 \n",
+ "3 F 0 121 0 \n",
+ "4 M 30366 101 2 \n",
+ "... ... ... ... ... \n",
+ "7065 M 71941 73 0 \n",
+ "7066 F 21604 79 0 \n",
+ "7067 M 0 85 3 \n",
+ "7068 M 21941 96 0 \n",
+ "7069 M 0 77 0 \n",
+ "\n",
+ " Policy Type Total Claim Amount Vehicle Class \n",
+ "0 Personal Auto 499.200000 Two-Door Car \n",
+ "1 Personal Auto 3.468912 Two-Door Car \n",
+ "2 Personal Auto 393.600000 Four-Door Car \n",
+ "3 Personal Auto 699.615192 SUV \n",
+ "4 Personal Auto 484.800000 SUV \n",
+ "... ... ... ... \n",
+ "7065 Personal Auto 198.234764 Four-Door Car \n",
+ "7066 Corporate Auto 379.200000 Four-Door Car \n",
+ "7067 Corporate Auto 790.784983 Four-Door Car \n",
+ "7068 Personal Auto 691.200000 Four-Door Car \n",
+ "7069 Corporate Auto 369.600000 Two-Door Car \n",
+ "\n",
+ "[7070 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "file_3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "80fde7a0-3405-491f-bc14-5d8d9ba026a4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Concatenate vertically \n",
+ "file_combined = pd.concat([file_1, file_2, file_3], ignore_index=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "94add22a-b471-4cc4-9710-b2a6e26ecf97",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ " State | \n",
+ " Gender | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 12069 | \n",
+ " LA72316 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 23405.98798 | \n",
+ " 71941.0 | \n",
+ " 73.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 198.234764 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12070 | \n",
+ " PK87824 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 3096.511217 | \n",
+ " 21604.0 | \n",
+ " 79.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 379.200000 | \n",
+ " California | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " 12071 | \n",
+ " TD14365 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 8163.890428 | \n",
+ " 0.0 | \n",
+ " 85.0 | \n",
+ " 3 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 790.784983 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12072 | \n",
+ " UP19263 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 7524.442436 | \n",
+ " 21941.0 | \n",
+ " 96.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 691.200000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12073 | \n",
+ " Y167826 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 2611.836866 | \n",
+ " 0.0 | \n",
+ " 77.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ " 369.600000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
12074 rows × 13 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "12069 LA72316 NaN NaN Bachelor \n",
+ "12070 PK87824 NaN NaN College \n",
+ "12071 TD14365 NaN NaN Bachelor \n",
+ "12072 UP19263 NaN NaN College \n",
+ "12073 Y167826 NaN NaN College \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "12069 23405.98798 71941.0 73.0 \n",
+ "12070 3096.511217 21604.0 79.0 \n",
+ "12071 8163.890428 0.0 85.0 \n",
+ "12072 7524.442436 21941.0 96.0 \n",
+ "12073 2611.836866 0.0 77.0 \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "12069 0 Personal Auto Four-Door Car \n",
+ "12070 0 Corporate Auto Four-Door Car \n",
+ "12071 3 Corporate Auto Four-Door Car \n",
+ "12072 0 Personal Auto Four-Door Car \n",
+ "12073 0 Corporate Auto Two-Door Car \n",
+ "\n",
+ " Total Claim Amount State Gender \n",
+ "0 2.704934 NaN NaN \n",
+ "1 1131.464935 NaN NaN \n",
+ "2 566.472247 NaN NaN \n",
+ "3 529.881344 NaN NaN \n",
+ "4 17.269323 NaN NaN \n",
+ "... ... ... ... \n",
+ "12069 198.234764 California M \n",
+ "12070 379.200000 California F \n",
+ "12071 790.784983 California M \n",
+ "12072 691.200000 California M \n",
+ "12073 369.600000 California M \n",
+ "\n",
+ "[12074 rows x 13 columns]"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Save the result to a new CSV\n",
+ "file_combined.to_csv(\"file_combined.csv\", index=False)\n",
+ "file_combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "caab6731-9b5d-471f-8cd9-368402168c3f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Original columns: ['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value', 'Income', 'Monthly Premium Auto', 'Number of Open Complaints', 'Policy Type', 'Vehicle Class', 'Total Claim Amount', 'State', 'Gender']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Original columns:\", file_combined.columns.tolist())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "dea296b8-efcd-45e4-8f21-3b0313c0ae46",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Function to clean column names\n",
+ "def clean_column_names(columns):\n",
+ " new_cols = []\n",
+ " for col in columns:\n",
+ " col = col.upper() \n",
+ " col = col.strip() \n",
+ " col = col.replace(\" \", \"_\") \n",
+ " new_cols.append(col)\n",
+ " return new_cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "b18a1916-8122-4b89-9e32-0e4095492698",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "file_combined.columns = clean_column_names(file_combined.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "62a01a94-850b-42ca-876b-96b2246b87cf",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " EDUCATION | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " INCOME | \n",
+ " MONTHLY_PREMIUM_AUTO | \n",
+ " NUMBER_OF_OPEN_COMPLAINTS | \n",
+ " POLICY_TYPE | \n",
+ " VEHICLE_CLASS | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " STATE | \n",
+ " GENDER | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 12069 | \n",
+ " LA72316 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 23405.98798 | \n",
+ " 71941.0 | \n",
+ " 73.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 198.234764 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12070 | \n",
+ " PK87824 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 3096.511217 | \n",
+ " 21604.0 | \n",
+ " 79.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 379.200000 | \n",
+ " California | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " 12071 | \n",
+ " TD14365 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 8163.890428 | \n",
+ " 0.0 | \n",
+ " 85.0 | \n",
+ " 3 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 790.784983 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12072 | \n",
+ " UP19263 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 7524.442436 | \n",
+ " 21941.0 | \n",
+ " 96.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 691.200000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12073 | \n",
+ " Y167826 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 2611.836866 | \n",
+ " 0.0 | \n",
+ " 77.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ " 369.600000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
12074 rows × 13 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER ST GENDER EDUCATION \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "12069 LA72316 NaN NaN Bachelor \n",
+ "12070 PK87824 NaN NaN College \n",
+ "12071 TD14365 NaN NaN Bachelor \n",
+ "12072 UP19263 NaN NaN College \n",
+ "12073 Y167826 NaN NaN College \n",
+ "\n",
+ " CUSTOMER_LIFETIME_VALUE INCOME MONTHLY_PREMIUM_AUTO \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "12069 23405.98798 71941.0 73.0 \n",
+ "12070 3096.511217 21604.0 79.0 \n",
+ "12071 8163.890428 0.0 85.0 \n",
+ "12072 7524.442436 21941.0 96.0 \n",
+ "12073 2611.836866 0.0 77.0 \n",
+ "\n",
+ " NUMBER_OF_OPEN_COMPLAINTS POLICY_TYPE VEHICLE_CLASS \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "12069 0 Personal Auto Four-Door Car \n",
+ "12070 0 Corporate Auto Four-Door Car \n",
+ "12071 3 Corporate Auto Four-Door Car \n",
+ "12072 0 Personal Auto Four-Door Car \n",
+ "12073 0 Corporate Auto Two-Door Car \n",
+ "\n",
+ " TOTAL_CLAIM_AMOUNT STATE GENDER \n",
+ "0 2.704934 NaN NaN \n",
+ "1 1131.464935 NaN NaN \n",
+ "2 566.472247 NaN NaN \n",
+ "3 529.881344 NaN NaN \n",
+ "4 17.269323 NaN NaN \n",
+ "... ... ... ... \n",
+ "12069 198.234764 California M \n",
+ "12070 379.200000 California F \n",
+ "12071 790.784983 California M \n",
+ "12072 691.200000 California M \n",
+ "12073 369.600000 California M \n",
+ "\n",
+ "[12074 rows x 13 columns]"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "file_combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "8793b688-af31-4938-aea3-3f7bd12412e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# data_cleaning.py\n",
+ "\n",
+ "def standardize_gender(file_combined, column=\"GENDER\"):\n",
+ " \"\"\"Standardize Gender column to 'M' and 'F'.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " file_combined[column] = file_combined[column].str.upper().replace({\n",
+ " \"FEMALE\": \"F\",\n",
+ " \"FEMAL\": \"F\",\n",
+ " \"MALE\": \"M\"\n",
+ " })\n",
+ " return file_combined\n",
+ " \n",
+ "def standardize_states(file_combined, column=\"STATES\"):\n",
+ " \"\"\"Replace state abbreviations with full names.\"\"\"\n",
+ " state_mapping = {\n",
+ " \"AZ\": \"Arizona\",\n",
+ " \"Cali\": \"California\",\n",
+ " \"WA\": \"Washington\"\n",
+ " }\n",
+ " if column in file_combined.columns:\n",
+ " file_combined[column] = file_combined[column].replace(state_mapping)\n",
+ " return file_combined\n",
+ "\n",
+ "def standardize_education(file_combined, column=\"EDUCATION\"):\n",
+ " \"\"\"Standardize education column (Bachelors -> Bachelor).\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " file_combined[column] = file_combined[column].replace({\n",
+ " \"Bachelors\": \"Bachelor\"\n",
+ " })\n",
+ " return file_combined\n",
+ "\n",
+ "def clean_customer_lifetime_value(file_combined, column=\"CUSTOMER_LIFETIME_VALUE\"):\n",
+ " \"\"\"Remove % or $ and convert to numeric.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " file_combined[column] = file_combined[column].astype(str).str.replace(\"%\", \"\").str.replace(\"$\", \"\").str.strip()\n",
+ " file_combined[column] = pd.to_numeric(file_combined[column], errors='coerce')\n",
+ " return file_combined\n",
+ "\n",
+ "def standardize_vehicle_class(file_combined, column=\"VEHICLE_CLASS\"):\n",
+ " \"\"\"Standardize vehicle class to simpler categories.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " file_combined[column] = file_combined[column].replace({\n",
+ " \"Sports Car\": \"Luxury\",\n",
+ " \"Luxury SUV\": \"Luxury\",\n",
+ " \"Luxury Car\": \"Luxury\"\n",
+ " })\n",
+ " return file_combined\n",
+ "\n",
+ "def clean_number_of_open_complaints(file_combined, column=\"NUMBER_OF_OPEN_COMPLAINTS\"):\n",
+ " \"\"\"Extract middle value from strings like '1/5/00' and convert to numeric.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " def extract_middle(value):\n",
+ " if pd.isna(value):\n",
+ " return 0\n",
+ " parts = str(value).split(\"/\")\n",
+ " return int(parts[1]) if len(parts) >= 2 else int(parts[0])\n",
+ " file_combined[column] = file_combined[column].apply(extract_middle)\n",
+ " return file_combined\n",
+ "\n",
+ "def fill_missing_values(file_combined):\n",
+ " \"\"\"Fill missing numeric values with mean, categorical with mode.\"\"\"\n",
+ " num_cols = file_combined.select_dtypes(include=['int64', 'float64']).columns\n",
+ " cat_cols = file_combined.select_dtypes(include=['object', 'category']).columns\n",
+ "\n",
+ " for col in num_cols:\n",
+ " file_combined[col].fillna(file_combined[col].mean(), inplace=True)\n",
+ " for col in cat_cols:\n",
+ " file_combined[col].fillna(file_combined[col].mode()[0], inplace=True)\n",
+ " return file_combined\n",
+ "\n",
+ "def drop_duplicates(file_combined, subset=None):\n",
+ " \"\"\"Drop duplicate rows based on optional subset of columns.\"\"\"\n",
+ " file_combined.drop_duplicates(subset=subset, keep='first', inplace=True)\n",
+ " file_combined.reset_index(drop=True, inplace=True)\n",
+ " return file_combined\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "b8b5114a-311a-4d7a-9f36-6d2813470031",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def main(file_combined):\n",
+ " \"\"\"Main function to perform all cleaning and formatting steps.\"\"\"\n",
+ " file_combined = standardize_gender(file_combined)\n",
+ " file_combined = standardize_states(file_combined)\n",
+ " file_combined = standardize_education(file_combined)\n",
+ " file_combined = clean_customer_lifetime_value(file_combined)\n",
+ " file_combined = standardize_vehicle_class(file_combined)\n",
+ " file_combined = clean_number_of_open_complaints(file_combined)\n",
+ " file_combined = fill_missing_values(file_combined)\n",
+ " file_combined = drop_duplicates(file_combined)\n",
+ " return file_combined"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "e068e8c9-bbc8-4238-8312-a0caf55e20bb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " EDUCATION | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " INCOME | \n",
+ " MONTHLY_PREMIUM_AUTO | \n",
+ " NUMBER_OF_OPEN_COMPLAINTS | \n",
+ " POLICY_TYPE | \n",
+ " VEHICLE_CLASS | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " STATE | \n",
+ " GENDER | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 12069 | \n",
+ " LA72316 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 23405.98798 | \n",
+ " 71941.0 | \n",
+ " 73.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 198.234764 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12070 | \n",
+ " PK87824 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 3096.511217 | \n",
+ " 21604.0 | \n",
+ " 79.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 379.200000 | \n",
+ " California | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " 12071 | \n",
+ " TD14365 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 8163.890428 | \n",
+ " 0.0 | \n",
+ " 85.0 | \n",
+ " 3 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 790.784983 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12072 | \n",
+ " UP19263 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 7524.442436 | \n",
+ " 21941.0 | \n",
+ " 96.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 691.200000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 12073 | \n",
+ " Y167826 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 2611.836866 | \n",
+ " 0.0 | \n",
+ " 77.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ " 369.600000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
12074 rows × 13 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER ST GENDER EDUCATION \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "12069 LA72316 NaN NaN Bachelor \n",
+ "12070 PK87824 NaN NaN College \n",
+ "12071 TD14365 NaN NaN Bachelor \n",
+ "12072 UP19263 NaN NaN College \n",
+ "12073 Y167826 NaN NaN College \n",
+ "\n",
+ " CUSTOMER_LIFETIME_VALUE INCOME MONTHLY_PREMIUM_AUTO \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "12069 23405.98798 71941.0 73.0 \n",
+ "12070 3096.511217 21604.0 79.0 \n",
+ "12071 8163.890428 0.0 85.0 \n",
+ "12072 7524.442436 21941.0 96.0 \n",
+ "12073 2611.836866 0.0 77.0 \n",
+ "\n",
+ " NUMBER_OF_OPEN_COMPLAINTS POLICY_TYPE VEHICLE_CLASS \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "12069 0 Personal Auto Four-Door Car \n",
+ "12070 0 Corporate Auto Four-Door Car \n",
+ "12071 3 Corporate Auto Four-Door Car \n",
+ "12072 0 Personal Auto Four-Door Car \n",
+ "12073 0 Corporate Auto Two-Door Car \n",
+ "\n",
+ " TOTAL_CLAIM_AMOUNT STATE GENDER \n",
+ "0 2.704934 NaN NaN \n",
+ "1 1131.464935 NaN NaN \n",
+ "2 566.472247 NaN NaN \n",
+ "3 529.881344 NaN NaN \n",
+ "4 17.269323 NaN NaN \n",
+ "... ... ... ... \n",
+ "12069 198.234764 California M \n",
+ "12070 379.200000 California F \n",
+ "12071 790.784983 California M \n",
+ "12072 691.200000 California M \n",
+ "12073 369.600000 California M \n",
+ "\n",
+ "[12074 rows x 13 columns]"
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "file_combined"
]
},
{
@@ -72,14 +1904,2533 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26",
"metadata": {
"id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26"
},
"outputs": [],
"source": [
- "# Your code goes here"
+ "url_4 = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv'\n",
+ "marketing_customer_analysis = pd.read_csv(url_4)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "92d101fa-4da1-41ff-8e28-59e295975fa4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unnamed:_0 | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " response | \n",
+ " coverage | \n",
+ " education | \n",
+ " effective_to_date | \n",
+ " employmentstatus | \n",
+ " gender | \n",
+ " ... | \n",
+ " number_of_policies | \n",
+ " policy_type | \n",
+ " policy | \n",
+ " renew_offer_type | \n",
+ " sales_channel | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ " vehicle_size | \n",
+ " vehicle_type | \n",
+ " month | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-19 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2011-02-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2011-02-13 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " California | \n",
+ " 6857.519928 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-01-08 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " ... | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unnamed:_0 customer state customer_lifetime_value response \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "... ... ... ... ... ... \n",
+ "10905 10905 FE99816 Nevada 15563.369440 No \n",
+ "10906 10906 KX53892 Oregon 5259.444853 No \n",
+ "10907 10907 TL39050 Arizona 23893.304100 No \n",
+ "10908 10908 WA60547 California 11971.977650 No \n",
+ "10909 10909 IV32877 California 6857.519928 No \n",
+ "\n",
+ " coverage education effective_to_date employmentstatus gender ... \\\n",
+ "0 Basic College 2011-02-18 Employed M ... \n",
+ "1 Basic College 2011-01-18 Unemployed F ... \n",
+ "2 Basic Bachelor 2011-02-10 Employed M ... \n",
+ "3 Extended College 2011-01-11 Employed M ... \n",
+ "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Premium Bachelor 2011-01-19 Unemployed F ... \n",
+ "10906 Basic College 2011-01-06 Employed F ... \n",
+ "10907 Extended Bachelor 2011-02-06 Employed F ... \n",
+ "10908 Premium College 2011-02-13 Employed F ... \n",
+ "10909 Basic Bachelor 2011-01-08 Unemployed M ... \n",
+ "\n",
+ " number_of_policies policy_type policy renew_offer_type \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "... ... ... ... ... \n",
+ "10905 7 Personal Auto Personal L1 Offer3 \n",
+ "10906 6 Personal Auto Personal L3 Offer2 \n",
+ "10907 2 Corporate Auto Corporate L3 Offer1 \n",
+ "10908 6 Personal Auto Personal L1 Offer1 \n",
+ "10909 3 Personal Auto Personal L1 Offer4 \n",
+ "\n",
+ " sales_channel total_claim_amount vehicle_class vehicle_size \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "10905 Web 1214.400000 Luxury Car Medsize \n",
+ "10906 Branch 273.018929 Four-Door Car Medsize \n",
+ "10907 Web 381.306996 Luxury SUV Medsize \n",
+ "10908 Branch 618.288849 SUV Medsize \n",
+ "10909 Web 1021.719397 SUV Medsize \n",
+ "\n",
+ " vehicle_type month \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "... ... ... \n",
+ "10905 A 1 \n",
+ "10906 A 1 \n",
+ "10907 A 2 \n",
+ "10908 A 2 \n",
+ "10909 A 1 \n",
+ "\n",
+ "[10910 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "0d23888a-a380-4d87-9532-a4f699391488",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Original columns: ['unnamed:_0', 'customer', 'state', 'customer_lifetime_value', 'response', 'coverage', 'education', 'effective_to_date', 'employmentstatus', 'gender', 'income', 'location_code', 'marital_status', 'monthly_premium_auto', 'months_since_last_claim', 'months_since_policy_inception', 'number_of_open_complaints', 'number_of_policies', 'policy_type', 'policy', 'renew_offer_type', 'sales_channel', 'total_claim_amount', 'vehicle_class', 'vehicle_size', 'vehicle_type', 'month']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Original columns:\", marketing_customer_analysis.columns.tolist())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "8677783d-31c0-40fb-a5f6-264712e2b07a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Function to clean column names\n",
+ "def clean_column_names_1(columns):\n",
+ " new_cols = []\n",
+ " for col in columns:\n",
+ " col = col.upper() \n",
+ " col = col.strip() \n",
+ " col = col.replace(\" \", \"_\") \n",
+ " new_cols.append(col)\n",
+ " return new_cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "da4ee899-45da-4eeb-8424-9bd4e0ba643e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "marketing_customer_analysis.columns = clean_column_names_1(marketing_customer_analysis.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "aa5f9d49-2c73-4fb9-958d-7a44b7484882",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " UNNAMED:_0 | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENTSTATUS | \n",
+ " GENDER | \n",
+ " ... | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ " MONTH | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-19 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2011-02-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2011-02-13 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " California | \n",
+ " 6857.519928 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-01-08 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " ... | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " UNNAMED:_0 CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "... ... ... ... ... ... \n",
+ "10905 10905 FE99816 Nevada 15563.369440 No \n",
+ "10906 10906 KX53892 Oregon 5259.444853 No \n",
+ "10907 10907 TL39050 Arizona 23893.304100 No \n",
+ "10908 10908 WA60547 California 11971.977650 No \n",
+ "10909 10909 IV32877 California 6857.519928 No \n",
+ "\n",
+ " COVERAGE EDUCATION EFFECTIVE_TO_DATE EMPLOYMENTSTATUS GENDER ... \\\n",
+ "0 Basic College 2011-02-18 Employed M ... \n",
+ "1 Basic College 2011-01-18 Unemployed F ... \n",
+ "2 Basic Bachelor 2011-02-10 Employed M ... \n",
+ "3 Extended College 2011-01-11 Employed M ... \n",
+ "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Premium Bachelor 2011-01-19 Unemployed F ... \n",
+ "10906 Basic College 2011-01-06 Employed F ... \n",
+ "10907 Extended Bachelor 2011-02-06 Employed F ... \n",
+ "10908 Premium College 2011-02-13 Employed F ... \n",
+ "10909 Basic Bachelor 2011-01-08 Unemployed M ... \n",
+ "\n",
+ " NUMBER_OF_POLICIES POLICY_TYPE POLICY RENEW_OFFER_TYPE \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "... ... ... ... ... \n",
+ "10905 7 Personal Auto Personal L1 Offer3 \n",
+ "10906 6 Personal Auto Personal L3 Offer2 \n",
+ "10907 2 Corporate Auto Corporate L3 Offer1 \n",
+ "10908 6 Personal Auto Personal L1 Offer1 \n",
+ "10909 3 Personal Auto Personal L1 Offer4 \n",
+ "\n",
+ " SALES_CHANNEL TOTAL_CLAIM_AMOUNT VEHICLE_CLASS VEHICLE_SIZE \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "10905 Web 1214.400000 Luxury Car Medsize \n",
+ "10906 Branch 273.018929 Four-Door Car Medsize \n",
+ "10907 Web 381.306996 Luxury SUV Medsize \n",
+ "10908 Branch 618.288849 SUV Medsize \n",
+ "10909 Web 1021.719397 SUV Medsize \n",
+ "\n",
+ " VEHICLE_TYPE MONTH \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "... ... ... \n",
+ "10905 A 1 \n",
+ "10906 A 1 \n",
+ "10907 A 2 \n",
+ "10908 A 2 \n",
+ "10909 A 1 \n",
+ "\n",
+ "[10910 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "2eec22a8-4bbb-4655-90ed-a24025c3eaa5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " UNNAMED:_0 | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENT_STATUS | \n",
+ " GENDER | \n",
+ " ... | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ " MONTH | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-19 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2011-02-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2011-02-13 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " California | \n",
+ " 6857.519928 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-01-08 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " ... | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " UNNAMED:_0 CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "... ... ... ... ... ... \n",
+ "10905 10905 FE99816 Nevada 15563.369440 No \n",
+ "10906 10906 KX53892 Oregon 5259.444853 No \n",
+ "10907 10907 TL39050 Arizona 23893.304100 No \n",
+ "10908 10908 WA60547 California 11971.977650 No \n",
+ "10909 10909 IV32877 California 6857.519928 No \n",
+ "\n",
+ " COVERAGE EDUCATION EFFECTIVE_TO_DATE EMPLOYMENT_STATUS GENDER ... \\\n",
+ "0 Basic College 2011-02-18 Employed M ... \n",
+ "1 Basic College 2011-01-18 Unemployed F ... \n",
+ "2 Basic Bachelor 2011-02-10 Employed M ... \n",
+ "3 Extended College 2011-01-11 Employed M ... \n",
+ "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Premium Bachelor 2011-01-19 Unemployed F ... \n",
+ "10906 Basic College 2011-01-06 Employed F ... \n",
+ "10907 Extended Bachelor 2011-02-06 Employed F ... \n",
+ "10908 Premium College 2011-02-13 Employed F ... \n",
+ "10909 Basic Bachelor 2011-01-08 Unemployed M ... \n",
+ "\n",
+ " NUMBER_OF_POLICIES POLICY_TYPE POLICY RENEW_OFFER_TYPE \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "... ... ... ... ... \n",
+ "10905 7 Personal Auto Personal L1 Offer3 \n",
+ "10906 6 Personal Auto Personal L3 Offer2 \n",
+ "10907 2 Corporate Auto Corporate L3 Offer1 \n",
+ "10908 6 Personal Auto Personal L1 Offer1 \n",
+ "10909 3 Personal Auto Personal L1 Offer4 \n",
+ "\n",
+ " SALES_CHANNEL TOTAL_CLAIM_AMOUNT VEHICLE_CLASS VEHICLE_SIZE \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "10905 Web 1214.400000 Luxury Car Medsize \n",
+ "10906 Branch 273.018929 Four-Door Car Medsize \n",
+ "10907 Web 381.306996 Luxury SUV Medsize \n",
+ "10908 Branch 618.288849 SUV Medsize \n",
+ "10909 Web 1021.719397 SUV Medsize \n",
+ "\n",
+ " VEHICLE_TYPE MONTH \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "... ... ... \n",
+ "10905 A 1 \n",
+ "10906 A 1 \n",
+ "10907 A 2 \n",
+ "10908 A 2 \n",
+ "10909 A 1 \n",
+ "\n",
+ "[10910 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis = marketing_customer_analysis.rename(columns={'EMPLOYMENTSTATUS': 'EMPLOYMENT_STATUS'})\n",
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "891a398b-70c8-4243-be8d-ce16faa7ebad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENT_STATUS | \n",
+ " GENDER | \n",
+ " INCOME | \n",
+ " ... | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ " MONTH | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 48029 | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 22139 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 49078 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " 23675 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-19 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 61146 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2011-02-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 39837 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2011-02-13 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 64195 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " California | \n",
+ " 6857.519928 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-01-08 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE COVERAGE \\\n",
+ "0 DK49336 Arizona 4809.216960 No Basic \n",
+ "1 KX64629 California 2228.525238 No Basic \n",
+ "2 LZ68649 Washington 14947.917300 No Basic \n",
+ "3 XL78013 Oregon 22332.439460 Yes Extended \n",
+ "4 QA50777 Oregon 9025.067525 No Premium \n",
+ "... ... ... ... ... ... \n",
+ "10905 FE99816 Nevada 15563.369440 No Premium \n",
+ "10906 KX53892 Oregon 5259.444853 No Basic \n",
+ "10907 TL39050 Arizona 23893.304100 No Extended \n",
+ "10908 WA60547 California 11971.977650 No Premium \n",
+ "10909 IV32877 California 6857.519928 No Basic \n",
+ "\n",
+ " EDUCATION EFFECTIVE_TO_DATE EMPLOYMENT_STATUS GENDER INCOME ... \\\n",
+ "0 College 2011-02-18 Employed M 48029 ... \n",
+ "1 College 2011-01-18 Unemployed F 0 ... \n",
+ "2 Bachelor 2011-02-10 Employed M 22139 ... \n",
+ "3 College 2011-01-11 Employed M 49078 ... \n",
+ "4 Bachelor 2011-01-17 Medical Leave F 23675 ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Bachelor 2011-01-19 Unemployed F 0 ... \n",
+ "10906 College 2011-01-06 Employed F 61146 ... \n",
+ "10907 Bachelor 2011-02-06 Employed F 39837 ... \n",
+ "10908 College 2011-02-13 Employed F 64195 ... \n",
+ "10909 Bachelor 2011-01-08 Unemployed M 0 ... \n",
+ "\n",
+ " NUMBER_OF_POLICIES POLICY_TYPE POLICY RENEW_OFFER_TYPE \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "... ... ... ... ... \n",
+ "10905 7 Personal Auto Personal L1 Offer3 \n",
+ "10906 6 Personal Auto Personal L3 Offer2 \n",
+ "10907 2 Corporate Auto Corporate L3 Offer1 \n",
+ "10908 6 Personal Auto Personal L1 Offer1 \n",
+ "10909 3 Personal Auto Personal L1 Offer4 \n",
+ "\n",
+ " SALES_CHANNEL TOTAL_CLAIM_AMOUNT VEHICLE_CLASS VEHICLE_SIZE \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "10905 Web 1214.400000 Luxury Car Medsize \n",
+ "10906 Branch 273.018929 Four-Door Car Medsize \n",
+ "10907 Web 381.306996 Luxury SUV Medsize \n",
+ "10908 Branch 618.288849 SUV Medsize \n",
+ "10909 Web 1021.719397 SUV Medsize \n",
+ "\n",
+ " VEHICLE_TYPE MONTH \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "... ... ... \n",
+ "10905 A 1 \n",
+ "10906 A 1 \n",
+ "10907 A 2 \n",
+ "10908 A 2 \n",
+ "10909 A 1 \n",
+ "\n",
+ "[10910 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis = marketing_customer_analysis.drop(columns=[\"UNNAMED:_0\"])\n",
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "d5a262a3-d76e-4001-97e4-42120f7cca1b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# data_cleaning.py\n",
+ "\n",
+ "def standardize_gender(marketing_customer_analysis, column=\"GENDER\"):\n",
+ " \"\"\"Standardize Gender column to 'M' and 'F'.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].str.upper().replace({\n",
+ " \"FEMALE\": \"F\",\n",
+ " \"FEMAL\": \"F\",\n",
+ " \"MALE\": \"M\"\n",
+ " })\n",
+ " return marketing_customer_analysis\n",
+ " \n",
+ "def standardize_states(marketing_customer_analysis, column=\"STATES\"):\n",
+ " \"\"\"Replace state abbreviations with full names.\"\"\"\n",
+ " state_mapping = {\n",
+ " \"AZ\": \"Arizona\",\n",
+ " \"Cali\": \"California\",\n",
+ " \"WA\": \"Washington\"\n",
+ " }\n",
+ " if column in marketing_customer_analysis.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].replace(state_mapping)\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def standardize_education(marketing_customer_analysis, column=\"EDUCATION\"):\n",
+ " \"\"\"Standardize education column (Bachelors -> Bachelor).\"\"\"\n",
+ " if column in marketing_customer_analysis.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].replace({\n",
+ " \"Bachelors\": \"Bachelor\"\n",
+ " })\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def clean_customer_lifetime_value(marketing_customer_analysis, column=\"CUSTOMER_LIFETIME_VALUE\"):\n",
+ " \"\"\"Remove % or $ and convert to numeric.\"\"\"\n",
+ " if column in marketing_customer_analysis.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].astype(str).str.replace(\"%\", \"\").str.replace(\"$\", \"\").str.strip()\n",
+ " marketing_customer_analysis[column] = pd.to_numeric(marketing_customer_analysis[column], errors='coerce')\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def standardize_vehicle_class(marketing_customer_analysis, column=\"VEHICLE_CLASS\"):\n",
+ " \"\"\"Standardize vehicle class to simpler categories.\"\"\"\n",
+ " if column in file_combined.columns:\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].replace({\n",
+ " \"Sports Car\": \"Luxury\",\n",
+ " \"Luxury SUV\": \"Luxury\",\n",
+ " \"Luxury Car\": \"Luxury\"\n",
+ " })\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def clean_number_of_open_complaints(marketing_customer_analysis, column=\"NUMBER_OF_OPEN_COMPLAINTS\"):\n",
+ " \"\"\"Extract middle value from strings like '1/5/00' and convert to numeric.\"\"\"\n",
+ " if column in marketing_customer_analysis.columns:\n",
+ " def extract_middle(value):\n",
+ " if pd.isna(value):\n",
+ " return 0\n",
+ " parts = str(value).split(\"/\")\n",
+ " return int(parts[1]) if len(parts) >= 2 else int(parts[0])\n",
+ " marketing_customer_analysis[column] = marketing_customer_analysis[column].apply(extract_middle)\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def fill_missing_values(marketing_customer_analysis):\n",
+ " \"\"\"Fill missing numeric values with mean, categorical with mode.\"\"\"\n",
+ " num_cols = marketing_customer_analysis.select_dtypes(include=['int64', 'float64']).columns\n",
+ " cat_cols = marketing_customer_analysis.select_dtypes(include=['object', 'category']).columns\n",
+ "\n",
+ " for col in num_cols:\n",
+ " marketing_customer_analysis[col].fillna(marketing_customer_analysis[col].mean(), inplace=True)\n",
+ " for col in cat_cols:\n",
+ " marketing_customer_analysis[col].fillna(marketing_customer_analysis[col].mode()[0], inplace=True)\n",
+ " return marketing_customer_analysis\n",
+ "\n",
+ "def drop_duplicates(marketing_customer_analysis, subset=None):\n",
+ " \"\"\"Drop duplicate rows based on optional subset of columns.\"\"\"\n",
+ " marketing_customer_analysis.drop_duplicates(subset=subset, keep='first', inplace=True)\n",
+ " marketing_customer_analysis.reset_index(drop=True, inplace=True)\n",
+ " return marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "id": "50beed86-9a87-49e6-8492-2bfeb8fe6488",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def main(marketing_customer_analysis):\n",
+ " \"\"\"Main function to perform all cleaning and formatting steps.\"\"\"\n",
+ " marketing_customer_analysis = standardize_gender(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = standardize_states(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = standardize_education(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = clean_customer_lifetime_value(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = standardize_vehicle_class(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = clean_number_of_open_complaints(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = fill_missing_values(marketing_customer_analysis)\n",
+ " marketing_customer_analysis = drop_duplicates(marketing_customer_analysis)\n",
+ " return marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "52d73c45-533c-4fe7-a1dd-77f2de6ab8fa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CUSTOMER | \n",
+ " STATE | \n",
+ " CUSTOMER_LIFETIME_VALUE | \n",
+ " RESPONSE | \n",
+ " COVERAGE | \n",
+ " EDUCATION | \n",
+ " EFFECTIVE_TO_DATE | \n",
+ " EMPLOYMENT_STATUS | \n",
+ " GENDER | \n",
+ " INCOME | \n",
+ " ... | \n",
+ " NUMBER_OF_POLICIES | \n",
+ " POLICY_TYPE | \n",
+ " POLICY | \n",
+ " RENEW_OFFER_TYPE | \n",
+ " SALES_CHANNEL | \n",
+ " TOTAL_CLAIM_AMOUNT | \n",
+ " VEHICLE_CLASS | \n",
+ " VEHICLE_SIZE | \n",
+ " VEHICLE_TYPE | \n",
+ " MONTH | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 48029 | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 22139 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 49078 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " 23675 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10905 | \n",
+ " FE99816 | \n",
+ " Nevada | \n",
+ " 15563.369440 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-19 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 1214.400000 | \n",
+ " Luxury | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10906 | \n",
+ " KX53892 | \n",
+ " Oregon | \n",
+ " 5259.444853 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 61146 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 273.018929 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10907 | \n",
+ " TL39050 | \n",
+ " Arizona | \n",
+ " 23893.304100 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2011-02-06 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 39837 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Web | \n",
+ " 381.306996 | \n",
+ " Luxury | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10908 | \n",
+ " WA60547 | \n",
+ " California | \n",
+ " 11971.977650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " College | \n",
+ " 2011-02-13 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 64195 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 618.288849 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10909 | \n",
+ " IV32877 | \n",
+ " California | \n",
+ " 6857.519928 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-01-08 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer4 | \n",
+ " Web | \n",
+ " 1021.719397 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10910 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CUSTOMER STATE CUSTOMER_LIFETIME_VALUE RESPONSE COVERAGE \\\n",
+ "0 DK49336 Arizona 4809.216960 No Basic \n",
+ "1 KX64629 California 2228.525238 No Basic \n",
+ "2 LZ68649 Washington 14947.917300 No Basic \n",
+ "3 XL78013 Oregon 22332.439460 Yes Extended \n",
+ "4 QA50777 Oregon 9025.067525 No Premium \n",
+ "... ... ... ... ... ... \n",
+ "10905 FE99816 Nevada 15563.369440 No Premium \n",
+ "10906 KX53892 Oregon 5259.444853 No Basic \n",
+ "10907 TL39050 Arizona 23893.304100 No Extended \n",
+ "10908 WA60547 California 11971.977650 No Premium \n",
+ "10909 IV32877 California 6857.519928 No Basic \n",
+ "\n",
+ " EDUCATION EFFECTIVE_TO_DATE EMPLOYMENT_STATUS GENDER INCOME ... \\\n",
+ "0 College 2011-02-18 Employed M 48029 ... \n",
+ "1 College 2011-01-18 Unemployed F 0 ... \n",
+ "2 Bachelor 2011-02-10 Employed M 22139 ... \n",
+ "3 College 2011-01-11 Employed M 49078 ... \n",
+ "4 Bachelor 2011-01-17 Medical Leave F 23675 ... \n",
+ "... ... ... ... ... ... ... \n",
+ "10905 Bachelor 2011-01-19 Unemployed F 0 ... \n",
+ "10906 College 2011-01-06 Employed F 61146 ... \n",
+ "10907 Bachelor 2011-02-06 Employed F 39837 ... \n",
+ "10908 College 2011-02-13 Employed F 64195 ... \n",
+ "10909 Bachelor 2011-01-08 Unemployed M 0 ... \n",
+ "\n",
+ " NUMBER_OF_POLICIES POLICY_TYPE POLICY RENEW_OFFER_TYPE \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "... ... ... ... ... \n",
+ "10905 7 Personal Auto Personal L1 Offer3 \n",
+ "10906 6 Personal Auto Personal L3 Offer2 \n",
+ "10907 2 Corporate Auto Corporate L3 Offer1 \n",
+ "10908 6 Personal Auto Personal L1 Offer1 \n",
+ "10909 3 Personal Auto Personal L1 Offer4 \n",
+ "\n",
+ " SALES_CHANNEL TOTAL_CLAIM_AMOUNT VEHICLE_CLASS VEHICLE_SIZE \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "10905 Web 1214.400000 Luxury Medsize \n",
+ "10906 Branch 273.018929 Four-Door Car Medsize \n",
+ "10907 Web 381.306996 Luxury Medsize \n",
+ "10908 Branch 618.288849 SUV Medsize \n",
+ "10909 Web 1021.719397 SUV Medsize \n",
+ "\n",
+ " VEHICLE_TYPE MONTH \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "... ... ... \n",
+ "10905 A 1 \n",
+ "10906 A 1 \n",
+ "10907 A 2 \n",
+ "10908 A 2 \n",
+ "10909 A 1 \n",
+ "\n",
+ "[10910 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "marketing_customer_analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "3c5da8e5-9d80-4c1e-9eea-3a41dcac8c8c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['No' 'Yes']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['RESPONSE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "10480ed9-1d34-4c74-94af-8048cf85bad9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Basic' 'Extended' 'Premium']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['COVERAGE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "id": "6bfedc9c-2b80-413b-b288-7dea6c4a323d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['2011-02-18' '2011-01-18' '2011-02-10' '2011-01-11' '2011-01-17'\n",
+ " '2011-02-14' '2011-02-24' '2011-01-19' '2011-01-04' '2011-01-02'\n",
+ " '2011-02-07' '2011-01-31' '2011-01-26' '2011-02-28' '2011-01-16'\n",
+ " '2011-02-26' '2011-02-23' '2011-01-15' '2011-02-02' '2011-02-15'\n",
+ " '2011-01-24' '2011-02-21' '2011-02-22' '2011-01-07' '2011-01-28'\n",
+ " '2011-02-08' '2011-02-12' '2011-02-20' '2011-01-05' '2011-02-19'\n",
+ " '2011-01-03' '2011-02-03' '2011-01-22' '2011-01-23' '2011-02-05'\n",
+ " '2011-02-13' '2011-01-25' '2011-02-16' '2011-02-01' '2011-01-27'\n",
+ " '2011-01-12' '2011-01-20' '2011-02-06' '2011-02-11' '2011-01-21'\n",
+ " '2011-01-29' '2011-01-09' '2011-02-09' '2011-02-27' '2011-01-01'\n",
+ " '2011-02-17' '2011-02-25' '2011-01-13' '2011-01-06' '2011-02-04'\n",
+ " '2011-01-14' '2011-01-10' '2011-01-08' '2011-01-30']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['EFFECTIVE_TO_DATE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "id": "dd6e76b0-3764-4baa-aafd-39cf2deddd4b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Employed' 'Unemployed' 'Medical Leave' 'Disabled' 'Retired']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['EMPLOYMENT_STATUS'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "id": "ca38eb29-4265-46b7-8e30-caece75e461c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Suburban' 'Urban' 'Rural']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['LOCATION_CODE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "7e46c178-f99c-4282-a7d0-d7c83d1031c3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Married' 'Single' 'Divorced']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['MARITAL_STATUS'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "id": "5bb5d03c-ddc3-4318-a2c6-01bbecaf7854",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ 7. 3. 34. 10. 15.14907074 2.\n",
+ " 8. 35. 33. 19. 13. 5.\n",
+ " 24. 25. 6. 20. 26. 14.\n",
+ " 9. 29. 11. 1. 18. 16.\n",
+ " 30. 12. 4. 22. 31. 21.\n",
+ " 28. 17. 15. 32. 23. 27.\n",
+ " 0. ]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "id": "9c220fb3-ae78-434e-8878-bef9fe305b9e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "float64\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'].dtype) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "id": "7c9233d1-1f54-4b82-9e65-1478ebfb5894",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Change 'MONTHS_SINCE_LAST_CLAIM' column from float to int\n",
+ "marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'] = marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'].astype(int)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "6cc0b878-35a5-4bfc-a4a0-f9e8497340c7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "int32\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'].dtype) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "id": "3c39f96e-8afa-4ff1-8fd4-96aa07ea5f60",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ 7 3 34 10 15 2 8 35 33 19 13 5 24 25 6 20 26 14 9 29 11 1 18 16\n",
+ " 30 12 4 22 31 21 28 17 32 23 27 0]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values - check\n",
+ "print(marketing_customer_analysis['MONTHS_SINCE_LAST_CLAIM'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "id": "04f07c50-a2b6-49eb-b0a9-0097f3efa78f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[52 26 31 3 73 99 45 24 8 29 32 25 28 87 10 74 1 38 58 37 7 80 95 78\n",
+ " 63 27 97 39 11 59 46 62 13 54 51 22 82 91 44 43 76 48 84 6 92 12 61 4\n",
+ " 18 66 70 16 75 34 35 64 9 89 0 60 71 23 55 93 2 67 81 40 57 86 19 72\n",
+ " 69 33 47 42 17 49 21 83 94 30 15 50 53 77 41 90 5 79 56 98 20 88 65 14\n",
+ " 85 96 36 68]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['MONTHS_SINCE_POLICY_INCEPTION'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "id": "d7525908-455a-4248-bd87-952998f8c661",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[9 1 2 7 4 3 6 8 5]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['NUMBER_OF_POLICIES'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "id": "09ff1c28-658a-435a-9c79-a9cc1ef7331f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Corporate L3' 'Personal L3' 'Personal L2' 'Corporate L2' 'Personal L1'\n",
+ " 'Special L1' 'Corporate L1' 'Special L3' 'Special L2']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['POLICY'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "id": "f43942b6-3250-49f0-bced-b41b2f925941",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Offer3' 'Offer4' 'Offer2' 'Offer1']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['RENEW_OFFER_TYPE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "id": "749d6f3e-3b21-49a6-a0ad-79079f6586ed",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Agent' 'Call Center' 'Branch' 'Web']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['SALES_CHANNEL'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "id": "f24f5304-385d-4542-a5d7-06a1af3d5f7f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Medsize' 'Small' 'Large']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['VEHICLE_SIZE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "id": "461455b6-e220-409b-a261-534232256532",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['A']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['VEHICLE_TYPE'].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "id": "0ecc63f2-323d-470c-ae8c-2326b88187f4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[2 1]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get unique values\n",
+ "print(marketing_customer_analysis['MONTH'].unique())"
]
},
{
@@ -93,6 +4444,37 @@
"Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "id": "9895fd97-4a20-4121-b910-d7b8868da7aa",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " TOTAL_CLAIM_AMOUNT\n",
+ "SALES_CHANNEL \n",
+ "Agent 1810226.82\n",
+ "Branch 1301204.00\n",
+ "Call Center 926600.82\n",
+ "Web 706600.04\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Create pivot table: total revenue by sales channel\n",
+ "pivot = pd.pivot_table(\n",
+ " marketing_customer_analysis,\n",
+ " index=\"SALES_CHANNEL\",\n",
+ " values=\"TOTAL_CLAIM_AMOUNT\",\n",
+ " aggfunc=\"sum\"\n",
+ ").round(2)\n",
+ "\n",
+ "print(pivot)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "640993b2-a291-436c-a34d-a551144f8196",
@@ -103,6 +4485,36 @@
"2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "id": "75d4a1f7-2eb5-4ab7-9d7d-92f38be3b218",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "EDUCATION Bachelor College Doctor High School or Below Master\n",
+ "GENDER \n",
+ "F 7874.27 7748.82 7328.51 8675.22 8157.05\n",
+ "M 7703.60 8052.46 7415.33 8149.69 8168.83\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Create the pivot table\n",
+ "pivot_clv = pd.pivot_table(\n",
+ " marketing_customer_analysis,\n",
+ " index='GENDER', \n",
+ " columns='EDUCATION', \n",
+ " values='CUSTOMER_LIFETIME_VALUE', \n",
+ " aggfunc='mean' \n",
+ ").round(2) \n",
+ "\n",
+ "print(pivot_clv)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "32c7f2e5-3d90-43e5-be33-9781b6069198",
@@ -130,15 +4542,127 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 80,
"id": "3a069e0b-b400-470e-904d-d17582191be4",
"metadata": {
"id": "3a069e0b-b400-470e-904d-d17582191be4"
},
"outputs": [],
"source": [
- "# Your code goes here"
+ "# Group by policy type and month, count complaints\n",
+ "summary = marketing_customer_analysis.groupby(['POLICY_TYPE', 'MONTH'])['NUMBER_OF_OPEN_COMPLAINTS'].sum().reset_index()\n"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "id": "6f1f4d7a-2139-4335-8b98-3ca18d27715e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Rename column for clarity\n",
+ "summary = summary.rename(columns={'NUMBER_OF_OPEN_COMPLAINTS': 'TOTAL_COMPLAINTS'})\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "id": "29818885-526f-450e-8efc-48a2a7085f42",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " POLICY_TYPE | \n",
+ " MONTH | \n",
+ " TOTAL_COMPLAINTS | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " 1 | \n",
+ " 443.434952 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Corporate Auto | \n",
+ " 2 | \n",
+ " 385.208135 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " 1 | \n",
+ " 1727.605722 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " 2 | \n",
+ " 1453.684441 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Special Auto | \n",
+ " 1 | \n",
+ " 87.074049 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Special Auto | \n",
+ " 2 | \n",
+ " 95.226817 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " POLICY_TYPE MONTH TOTAL_COMPLAINTS\n",
+ "0 Corporate Auto 1 443.434952\n",
+ "1 Corporate Auto 2 385.208135\n",
+ "2 Personal Auto 1 1727.605722\n",
+ "3 Personal Auto 2 1453.684441\n",
+ "4 Special Auto 1 87.074049\n",
+ "5 Special Auto 2 95.226817"
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "summary"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "073e7590-4e38-4e99-946c-dfb1785ff3f6",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
@@ -146,9 +4670,9 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "Python [conda env:base] *",
"language": "python",
- "name": "python3"
+ "name": "conda-base-py"
},
"language_info": {
"codemirror_mode": {
@@ -160,7 +4684,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.11.5"
}
},
"nbformat": 4,