From 246aa73e04eff808a60f12ccdf052c0241c7f8d7 Mon Sep 17 00:00:00 2001 From: Lewis Clark Date: Sun, 3 Aug 2025 17:09:02 +0200 Subject: [PATCH] Lab completed --- LabsDataStructuringAndCombing.ipynb | 2387 +++++++++++++++++++++++++++ 1 file changed, 2387 insertions(+) create mode 100644 LabsDataStructuringAndCombing.ipynb diff --git a/LabsDataStructuringAndCombing.ipynb b/LabsDataStructuringAndCombing.ipynb new file mode 100644 index 0000000..b8ed6f3 --- /dev/null +++ b/LabsDataStructuringAndCombing.ipynb @@ -0,0 +1,2387 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "149fd7de-5571-4a43-831b-bf404fde8451", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "1cebe2cb-7eb3-47fb-b91e-03d7e43e6035", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNMaster588174010000Personal AutoFour-Door Car2
1QZ44356ArizonaFBachelor6979530940Personal AutoFour-Door Car1131
2AI49188NevadaFBachelor1288743487671080Personal AutoTwo-Door Car566
3WW63253CaliforniaMBachelor76458601060Corporate AutoSUV529
4GA49547WashingtonMHigh School or Below53630736357680Personal AutoFour-Door Car17
....................................
1067VJ51327CaliforniaFHigh School or Below2031499632091022Personal AutoSUV207
1068GS98873ArizonaFBachelor32391216061880Personal AutoFour-Door Car633
1069CW49887CaliforniaFMaster462680794871140Special AutoSUV547
1070MY31220CaliforniaFCollege899704542301120Personal AutoTwo-Door Car537
1071AA71604CaliforniaNaNBachelor58817436234830Personal AutoFour-Door Car354
\n", + "

1072 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " customer state gender education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "1067 VJ51327 California F High School or Below \n", + "1068 GS98873 Arizona F Bachelor \n", + "1069 CW49887 California F Master \n", + "1070 MY31220 California F College \n", + "1071 AA71604 California NaN Bachelor \n", + "\n", + " customer_lifetime_value income monthly_premium_auto \\\n", + "0 588174 0 1000 \n", + "1 697953 0 94 \n", + "2 1288743 48767 108 \n", + "3 764586 0 106 \n", + "4 536307 36357 68 \n", + "... ... ... ... \n", + "1067 2031499 63209 102 \n", + "1068 323912 16061 88 \n", + "1069 462680 79487 114 \n", + "1070 899704 54230 112 \n", + "1071 588174 36234 83 \n", + "\n", + " number_of_open_complaints policy_type vehicle_class \\\n", + "0 0 Personal Auto Four-Door Car \n", + "1 0 Personal Auto Four-Door Car \n", + "2 0 Personal Auto Two-Door Car \n", + "3 0 Corporate Auto SUV \n", + "4 0 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "1067 2 Personal Auto SUV \n", + "1068 0 Personal Auto Four-Door Car \n", + "1069 0 Special Auto SUV \n", + "1070 0 Personal Auto Two-Door Car \n", + "1071 0 Personal Auto Four-Door Car \n", + "\n", + " total_claim_amount \n", + "0 2 \n", + "1 1131 \n", + "2 566 \n", + "3 529 \n", + "4 17 \n", + "... ... \n", + "1067 207 \n", + "1068 633 \n", + "1069 547 \n", + "1070 537 \n", + "1071 354 \n", + "\n", + "[1072 rows x 11 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cleaned_file1 = pd.read_csv('cleaned_data.csv') ### cleaned file from previous LABS\n", + "cleaned_file1" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "b6333d2f-77c9-477b-a375-78cb469459fb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "customer object\n", + "state object\n", + "gender object\n", + "education object\n", + "customer_lifetime_value int64\n", + "income int64\n", + "monthly_premium_auto int64\n", + "number_of_open_complaints int64\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "print(cleaned_file1.dtypes)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "98615fba-e2a8-4a98-aae6-3166b0fb4be4", + "metadata": {}, + "outputs": [], + "source": [ + "url_2 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\"\n", + "df_2 = pd.read_csv(url_2) ### reading second file" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "1f0e2dba-c82a-4f43-8024-d82ce7868528", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Customer Lifetime Value'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/anaconda3/lib/python3.13/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[0;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7081\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7089\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'Customer Lifetime Value'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[25], line 8\u001b[0m\n\u001b[1;32m 1\u001b[0m df_2\u001b[38;5;241m.\u001b[39mrename(columns\u001b[38;5;241m=\u001b[39m{\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mState\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mST\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGender\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGENDER\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTotal Claim Amount\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTotal Claim Amount\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;66;03m### ensuring naming of columns is the same\u001b[39;00m\n\u001b[1;32m 5\u001b[0m }, inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m----> 8\u001b[0m df_2[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCustomer Lifetime Value\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df_2[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCustomer Lifetime Value\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mstr\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mfloat\u001b[39m) \u001b[38;5;66;03m### removing %\u001b[39;00m\n\u001b[1;32m 11\u001b[0m df_2[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNumber of Open Complaints\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mto_numeric(df_2[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNumber of Open Complaints\u001b[39m\u001b[38;5;124m'\u001b[39m], errors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcoerce\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mfillna(\u001b[38;5;241m0\u001b[39m)\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mint\u001b[39m)\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.13/site-packages/pandas/core/frame.py:4102\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 4100\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 4101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> 4102\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mget_loc(key)\n\u001b[1;32m 4103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[1;32m 4104\u001b[0m indexer \u001b[38;5;241m=\u001b[39m [indexer]\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.13/site-packages/pandas/core/indexes/base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3807\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m 3808\u001b[0m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m 3809\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[1;32m 3810\u001b[0m ):\n\u001b[1;32m 3811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3812\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3814\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3815\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3816\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3817\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n", + "\u001b[0;31mKeyError\u001b[0m: 'Customer Lifetime Value'" + ] + } + ], + "source": [ + "df_2.rename(columns={\n", + " 'State': 'ST',\n", + " 'Gender': 'GENDER',\n", + " 'Total Claim Amount': 'Total Claim Amount', ### ensuring naming of columns is the same\n", + "}, inplace=True)\n", + "\n", + "\n", + "df_2['Customer Lifetime Value'] = df_2['Customer Lifetime Value'].str.replace('%', '').astype(float) ### removing %\n", + "\n", + "\n", + "df_2['Number of Open Complaints'] = pd.to_numeric(df_2['Number of Open Complaints'], errors='coerce').fillna(0).astype(int)\n", + "### converts Num of Open Complaints to an integer. Replacing missing or invalid with the integer 0" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "9ddf0082-6deb-49ea-a6fe-a97e7cecedd9", + "metadata": {}, + "outputs": [], + "source": [ + "df_2.rename(columns={\n", + " 'Customer': 'customer',\n", + " 'ST': 'state',\n", + " 'GENDER': 'gender',\n", + " 'Education': 'education',\n", + " 'Customer Lifetime Value': 'customer_lifetime_value', ### ensuring all column names are the same\n", + " 'Income': 'income',\n", + " 'Monthly Premium Auto': 'monthly_premium_auto',\n", + " 'Number of Open Complaints': 'number_of_open_complaints',\n", + " 'Policy Type': 'policy_type',\n", + " 'Vehicle Class': 'vehicle_class',\n", + " 'Total Claim Amount': 'total_claim_amount'\n", + "}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "c6bc08af-bf69-43ad-8840-f92c7996ed2c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNMaster588174010000Personal AutoFour-Door Car2
1QZ44356ArizonaFBachelor6979530940Personal AutoFour-Door Car1131
2AI49188NevadaFBachelor1288743487671080Personal AutoTwo-Door Car566
3WW63253CaliforniaMBachelor76458601060Corporate AutoSUV529
4GA49547WashingtonMHigh School or Below53630736357680Personal AutoFour-Door Car17
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value \\\n", + "0 RB50392 Washington NaN Master 588174 \n", + "1 QZ44356 Arizona F Bachelor 697953 \n", + "2 AI49188 Nevada F Bachelor 1288743 \n", + "3 WW63253 California M Bachelor 764586 \n", + "4 GA49547 Washington M High School or Below 536307 \n", + "\n", + " income monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 0 1000 0 Personal Auto \n", + "1 0 94 0 Personal Auto \n", + "2 48767 108 0 Personal Auto \n", + "3 0 106 0 Corporate Auto \n", + "4 36357 68 0 Personal Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 2 \n", + "1 Four-Door Car 1131 \n", + "2 Two-Door Car 566 \n", + "3 SUV 529 \n", + "4 Four-Door Car 17 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cleaned_file1.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "0dc342ac-4d6a-4ab1-a80d-6bfd0fedd15b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0GS98873ArizonaFBachelor323912.4716061880Personal AutoFour-Door Car633.6
1CW49887CaliforniaFMaster462680.11794871140Special AutoSUV547.2
2MY31220CaliforniaFCollege899704.02542301120Personal AutoTwo-Door Car537.6
3UH35128OregonFCollege2580706.30712102140Personal AutoLuxury Car1027.2
4WH52799ArizonaFCollege380812.2194903940Corporate AutoTwo-Door Car451.2
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47 16061 \n", + "1 CW49887 California F Master 462680.11 79487 \n", + "2 MY31220 California F College 899704.02 54230 \n", + "3 UH35128 Oregon F College 2580706.30 71210 \n", + "4 WH52799 Arizona F College 380812.21 94903 \n", + "\n", + " monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 88 0 Personal Auto \n", + "1 114 0 Special Auto \n", + "2 112 0 Personal Auto \n", + "3 214 0 Personal Auto \n", + "4 94 0 Corporate Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 633.6 \n", + "1 SUV 547.2 \n", + "2 Two-Door Car 537.6 \n", + "3 Luxury Car 1027.2 \n", + "4 Two-Door Car 451.2 " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "764af229-0101-4a49-8410-e0d28f31b89d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "customer object\n", + "state object\n", + "gender object\n", + "education object\n", + "customer_lifetime_value float64\n", + "income int64\n", + "monthly_premium_auto int64\n", + "number_of_open_complaints int64\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount float64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "print(df_2.dtypes)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "f11a00ce-4278-445d-aa46-d1bba15b5bb5", + "metadata": {}, + "outputs": [], + "source": [ + "df_2 = df_2[cleaned_file1.columns] ### matching the names to cleaned_file1" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "c51c7301-092e-43ac-a5fb-ffd0c1ce97e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0GS98873ArizonaFBachelor323912.4716061880Personal AutoFour-Door Car633.6
1CW49887CaliforniaFMaster462680.11794871140Special AutoSUV547.2
2MY31220CaliforniaFCollege899704.02542301120Personal AutoTwo-Door Car537.6
3UH35128OregonFCollege2580706.30712102140Personal AutoLuxury Car1027.2
4WH52799ArizonaFCollege380812.2194903940Corporate AutoTwo-Door Car451.2
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47 16061 \n", + "1 CW49887 California F Master 462680.11 79487 \n", + "2 MY31220 California F College 899704.02 54230 \n", + "3 UH35128 Oregon F College 2580706.30 71210 \n", + "4 WH52799 Arizona F College 380812.21 94903 \n", + "\n", + " monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 88 0 Personal Auto \n", + "1 114 0 Special Auto \n", + "2 112 0 Personal Auto \n", + "3 214 0 Personal Auto \n", + "4 94 0 Corporate Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 633.6 \n", + "1 SUV 547.2 \n", + "2 Two-Door Car 537.6 \n", + "3 Luxury Car 1027.2 \n", + "4 Two-Door Car 451.2 " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "7ff2eeff-0d53-4090-92bc-dd146adc5993", + "metadata": {}, + "outputs": [], + "source": [ + "url_3 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\"\n", + "df_3 = pd.read_csv(url_3)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "8ac860ef-7307-4263-bc38-bc7a971fbc88", + "metadata": {}, + "outputs": [], + "source": [ + "df_3.rename(columns={\n", + " 'Customer': 'customer',\n", + " 'State': 'state',\n", + " 'Gender': 'gender',\n", + " 'Education': 'education',\n", + " 'Customer Lifetime Value': 'customer_lifetime_value', ### changing the column names again\n", + " 'Income': 'income',\n", + " 'Monthly Premium Auto': 'monthly_premium_auto',\n", + " 'Number of Open Complaints': 'number_of_open_complaints',\n", + " 'Policy Type': 'policy_type',\n", + " 'Vehicle Class': 'vehicle_class',\n", + " 'Total Claim Amount': 'total_claim_amount'\n", + "}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "5fb4c370-dfdc-49e6-8d12-290543964d52", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueeducationgenderincomemonthly_premium_autonumber_of_open_complaintspolicy_typetotal_claim_amountvehicle_class
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.200000Two-Door Car
1TB86706Arizona2502.637401MasterM0660Personal Auto3.468912Two-Door Car
2ZL73902Nevada3265.156348BachelorF25820820Personal Auto393.600000Four-Door Car
3KX23516California4455.843406High School or BelowF01210Personal Auto699.615192SUV
4FN77294California7704.958480High School or BelowM303661012Personal Auto484.800000SUV
\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value education gender \\\n", + "0 SA25987 Washington 3479.137523 High School or Below M \n", + "1 TB86706 Arizona 2502.637401 Master M \n", + "2 ZL73902 Nevada 3265.156348 Bachelor F \n", + "3 KX23516 California 4455.843406 High School or Below F \n", + "4 FN77294 California 7704.958480 High School or Below M \n", + "\n", + " income monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 0 104 0 Personal Auto \n", + "1 0 66 0 Personal Auto \n", + "2 25820 82 0 Personal Auto \n", + "3 0 121 0 Personal Auto \n", + "4 30366 101 2 Personal Auto \n", + "\n", + " total_claim_amount vehicle_class \n", + "0 499.200000 Two-Door Car \n", + "1 3.468912 Two-Door Car \n", + "2 393.600000 Four-Door Car \n", + "3 699.615192 SUV \n", + "4 484.800000 SUV " + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_3.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "29bdd186-b520-47bc-8fb9-31f6ff74ead5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNMaster588174010000Personal AutoFour-Door Car2
1QZ44356ArizonaFBachelor6979530940Personal AutoFour-Door Car1131
2AI49188NevadaFBachelor1288743487671080Personal AutoTwo-Door Car566
3WW63253CaliforniaMBachelor76458601060Corporate AutoSUV529
4GA49547WashingtonMHigh School or Below53630736357680Personal AutoFour-Door Car17
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value \\\n", + "0 RB50392 Washington NaN Master 588174 \n", + "1 QZ44356 Arizona F Bachelor 697953 \n", + "2 AI49188 Nevada F Bachelor 1288743 \n", + "3 WW63253 California M Bachelor 764586 \n", + "4 GA49547 Washington M High School or Below 536307 \n", + "\n", + " income monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 0 1000 0 Personal Auto \n", + "1 0 94 0 Personal Auto \n", + "2 48767 108 0 Personal Auto \n", + "3 0 106 0 Corporate Auto \n", + "4 36357 68 0 Personal Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 2 \n", + "1 Four-Door Car 1131 \n", + "2 Two-Door Car 566 \n", + "3 SUV 529 \n", + "4 Four-Door Car 17 " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cleaned_file1.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "75dc2923-0757-42a0-aa0b-5f7508ad5853", + "metadata": {}, + "outputs": [], + "source": [ + "df_3 = df_3[cleaned_file1.columns] ### putting df_3 in the same order" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "2ec69f90-2598-458f-8692-f692dbb0df65", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0SA25987WashingtonMHigh School or Below3479.13752301040Personal AutoTwo-Door Car499.2
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value \\\n", + "0 SA25987 Washington M High School or Below 3479.137523 \n", + "\n", + " income monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 0 104 0 Personal Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Two-Door Car 499.2 " + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_3.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "b1fa2ffc-e6cc-42ad-aba4-d839b9299dbe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNMaster588174010000Personal AutoFour-Door Car2
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value income \\\n", + "0 RB50392 Washington NaN Master 588174 0 \n", + "\n", + " monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 1000 0 Personal Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 2 " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cleaned_file1.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "ab3ea6f7-3bc2-4c91-b754-a7c05f9e748a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0GS98873ArizonaFBachelor323912.4716061880Personal AutoFour-Door Car633.6
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47 16061 \n", + "\n", + " monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 88 0 Personal Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 633.6 " + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_2.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "22d9cdf1-90fb-4b75-b610-f7589d82621d", + "metadata": {}, + "outputs": [], + "source": [ + "def strip_whitespace(df):\n", + " str_cols = df.select_dtypes(include='object').columns\n", + " for col in str_cols:\n", + " df[col] = df[col].str.strip() ### cleaning the whitespace in the columns in all datasets\n", + " return df\n", + "\n", + "cleaned_file1 = strip_whitespace(cleaned_file1)\n", + "df_2 = strip_whitespace(df_2)\n", + "df_3 = strip_whitespace(df_3)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "0fe732b1-99a6-49b0-a264-b74b0b53d40b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNMaster588174010000Personal AutoFour-Door Car2
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value income \\\n", + "0 RB50392 Washington NaN Master 588174 0 \n", + "\n", + " monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 1000 0 Personal Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 2 " + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cleaned_file1.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "7e36043e-63be-4c62-9acb-ce2daab72251", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0GS98873ArizonaFBachelor323912.4716061880Personal AutoFour-Door Car633.6
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47 16061 \n", + "\n", + " monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 88 0 Personal Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 633.6 " + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_2.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "ddd6619a-4369-4833-8520-378e8f237ede", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0SA25987WashingtonMHigh School or Below3479.13752301040Personal AutoTwo-Door Car499.2
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value \\\n", + "0 SA25987 Washington M High School or Below 3479.137523 \n", + "\n", + " income monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 0 104 0 Personal Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Two-Door Car 499.2 " + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_3.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "5932abd2-fb75-42dd-a44b-128f07f66076", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "customer object\n", + "state object\n", + "gender object\n", + "education object\n", + "customer_lifetime_value int64\n", + "income int64\n", + "monthly_premium_auto int64\n", + "number_of_open_complaints int64\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount int64\n", + "dtype: object\n", + "customer object\n", + "state object\n", + "gender object\n", + "education object\n", + "customer_lifetime_value float64\n", + "income int64\n", + "monthly_premium_auto int64\n", + "number_of_open_complaints int64\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount float64\n", + "dtype: object\n", + "customer object\n", + "state object\n", + "gender object\n", + "education object\n", + "customer_lifetime_value float64\n", + "income int64\n", + "monthly_premium_auto int64\n", + "number_of_open_complaints int64\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount float64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "print(cleaned_file1.dtypes)\n", + "print(df_2.dtypes)\n", + "print(df_3.dtypes)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "c798e3f2-e36d-4ce4-b103-3fe9dc6c859e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1072, 11)" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cleaned_file1.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "692a322b-d178-413c-b188-bb252e2dd7dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(996, 11)" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_2.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "a58c29f5-abaa-4915-8845-22b64221b1f0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(7070, 11)" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_3.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "cfc5f58c-9859-4ced-8c54-3a7f28089f9f", + "metadata": {}, + "outputs": [], + "source": [ + "for df in [df_2, df_3]:\n", + " df['customer_lifetime_value'] = df['customer_lifetime_value'].fillna(0).astype(int) ### converting to all the same data type\n", + " df['total_claim_amount'] = df['total_claim_amount'].fillna(0).astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "897a2b9c-1b0a-4dc8-aa0f-4b44993076a5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "customer object\n", + "state object\n", + "gender object\n", + "education object\n", + "customer_lifetime_value int64\n", + "income int64\n", + "monthly_premium_auto int64\n", + "number_of_open_complaints int64\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount int64\n", + "dtype: object\n", + "customer object\n", + "state object\n", + "gender object\n", + "education object\n", + "customer_lifetime_value int64\n", + "income int64\n", + "monthly_premium_auto int64\n", + "number_of_open_complaints int64\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount int64\n", + "dtype: object\n", + "customer object\n", + "state object\n", + "gender object\n", + "education object\n", + "customer_lifetime_value int64\n", + "income int64\n", + "monthly_premium_auto int64\n", + "number_of_open_complaints int64\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "print(cleaned_file1.dtypes)\n", + "print(df_2.dtypes)\n", + "print(df_3.dtypes)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "e3eb3e74-7a7a-4a75-8511-a54288499885", + "metadata": {}, + "outputs": [], + "source": [ + "final_df = pd.concat([cleaned_file1, df_2, df_3], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "95cd7cb1-cc14-4b01-ad8f-f38b59d465bb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(9138, 11)" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "26ebcac5-cd9d-44e4-97ee-05fafbc5029f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNMaster588174010000Personal AutoFour-Door Car2
1QZ44356ArizonaFBachelor6979530940Personal AutoFour-Door Car1131
2AI49188NevadaFBachelor1288743487671080Personal AutoTwo-Door Car566
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value income \\\n", + "0 RB50392 Washington NaN Master 588174 0 \n", + "1 QZ44356 Arizona F Bachelor 697953 0 \n", + "2 AI49188 Nevada F Bachelor 1288743 48767 \n", + "\n", + " monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 1000 0 Personal Auto \n", + "1 94 0 Personal Auto \n", + "2 108 0 Personal Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 2 \n", + "1 Four-Door Car 1131 \n", + "2 Two-Door Car 566 " + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "cd507cc8-1198-467e-944d-6c249b93da9a", + "metadata": {}, + "outputs": [], + "source": [ + "final_df.to_csv('final_cleaned_data.csv', index=False) ### saving the damn thing" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "407f8813-eeb1-4205-9311-cdd567eb551a", + "metadata": {}, + "outputs": [], + "source": [ + "### next challenge ###" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "888cc347-2789-4646-a443-1bebf6ed6125", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
00DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
11KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
22LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
33XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
44QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
\n", + "

5 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 9 Corporate Auto Corporate L3 Offer3 \n", + "1 1 Personal Auto Personal L3 Offer4 \n", + "2 2 Personal Auto Personal L3 Offer3 \n", + "3 2 Corporate Auto Corporate L3 Offer2 \n", + "4 7 Personal Auto Personal L2 Offer1 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "0 Agent 292.800000 Four-Door Car Medsize \n", + "1 Call Center 744.924331 Four-Door Car Medsize \n", + "2 Call Center 480.000000 SUV Medsize \n", + "3 Branch 484.013411 Four-Door Car Medsize \n", + "4 Branch 707.925645 Four-Door Car Medsize \n", + "\n", + " vehicle_type month \n", + "0 A 2 \n", + "1 A 1 \n", + "2 A 2 \n", + "3 A 1 \n", + "4 A 1 \n", + "\n", + "[5 rows x 27 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "marketing_data = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\"\n", + "marketing_data = pd.read_csv(marketing_data) ### reading data\n", + "marketing_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "2e3dd394-bee5-459e-9daa-1d58c61ca3fe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " total_claim_amount\n", + "sales_channel \n", + "Agent 1810226.82\n", + "Branch 1301204.00\n", + "Call Center 926600.82\n", + "Web 706600.04\n" + ] + } + ], + "source": [ + "pivot_table = marketing_data.pivot_table(\n", + " index='sales_channel',\n", + " values='total_claim_amount', \n", + " aggfunc='sum'\n", + ").round(2)\n", + "\n", + "print(pivot_table)\n", + "\n", + "### Create a pivot table of total revenue by sales channel, \n", + "### rounded to 2 decimal places. Then, briefly analyze the results." + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "b1074acf-3d14-42a2-9be0-dc30a269f0a1", + "metadata": {}, + "outputs": [], + "source": [ + "### Agent and Branch channels generated the highest revenue\n", + "### shows that people still like using personal interactions\n", + "\n", + "### web underperforming. Could this be lack of interaction? Performance of website?" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "873ba78f-0393-49e3-a94d-312712975e60", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "education Bachelor College Doctor High School or Below Master\n", + "gender \n", + "F 8032.68 7671.33 7070.40 8746.25 8535.33\n", + "M 7734.13 8057.15 7848.96 7908.65 8112.83\n" + ] + } + ], + "source": [ + "### Create a pivot table that shows the average customer lifetime value per gender and education level. \n", + "###Analyze the resulting table to draw insights.\n", + "\n", + "pivot_table = df.pivot_table(\n", + " values='customer_lifetime_value',\n", + " index='gender',\n", + " columns='education',\n", + " aggfunc='mean'\n", + ").round(2)\n", + "\n", + "print(pivot_table)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "e8d8f2aa-5ece-40db-8838-09586476f1aa", + "metadata": {}, + "outputs": [], + "source": [ + "### Femalees with High School education have the highest average lifetime value.\n", + "### Females tend to have higher lifetime values (Bachelor, High School, Master, 3 out of 5)\n", + "### Doesn't seem to be any particular trend?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "860d2c14-a997-4995-9bf3-0fea1fc82efe", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:base] *", + "language": "python", + "name": "conda-base-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}