From eb9410d97bab1f86fa9c43e85e11a034f1f7bc64 Mon Sep 17 00:00:00 2001 From: Ricardo Castanheira Date: Thu, 11 Sep 2025 15:44:42 +0100 Subject: [PATCH] Solved lab --- lab-dw-data-structuring-and-combining.ipynb | 2687 ++++++++++++++++++- 1 file changed, 2670 insertions(+), 17 deletions(-) diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb index ec4e3f9..f596e68 100644 --- a/lab-dw-data-structuring-and-combining.ipynb +++ b/lab-dw-data-structuring-and-combining.ipynb @@ -36,14 +36,2083 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 75, "id": "492d06e3-92c7-4105-ac72-536db98d3244", "metadata": { "id": "492d06e3-92c7-4105-ac72-536db98d3244" }, "outputs": [], "source": [ - "# Your code goes here" + "import pandas as pd\n", + "\n", + "url_1 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\"\n", + "\n", + "data_1 = pd.read_csv(url_1)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "336e494e-4db0-4039-9113-d4480dda988f", + "metadata": {}, + "outputs": [], + "source": [ + "url_2 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\"\n", + "url_3 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\"\n", + "\n", + "data_2 = pd.read_csv(url_2)\n", + "data_3 = pd.read_csv(url_3)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "e329cb53-d2f0-4db0-ba10-9ee028e638de", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value \\\n", + "0 RB50392 Washington NaN Master NaN \n", + "1 QZ44356 Arizona F Bachelor 697953.59% \n", + "2 AI49188 Nevada F Bachelor 1288743.17% \n", + "3 WW63253 California M Bachelor 764586.18% \n", + "4 GA49547 Washington M High School or Below 536307.65% \n", + "\n", + " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "0 0.0 1000.0 1/0/00 Personal Auto \n", + "1 0.0 94.0 1/0/00 Personal Auto \n", + "2 48767.0 108.0 1/0/00 Personal Auto \n", + "3 0.0 106.0 1/0/00 Corporate Auto \n", + "4 36357.0 68.0 1/0/00 Personal Auto \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "0 Four-Door Car 2.704934 \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsTotal Claim AmountPolicy TypeVehicle Class
0GS98873ArizonaFBachelor323912.47%16061881/0/00633.6Personal AutoFour-Door Car
1CW49887CaliforniaFMaster462680.11%794871141/0/00547.2Special AutoSUV
2MY31220CaliforniaFCollege899704.02%542301121/0/00537.6Personal AutoTwo-Door Car
3UH35128OregonFCollege2580706.30%712102141/1/001027.2Personal AutoLuxury Car
4WH52799ArizonaFCollege380812.21%94903941/0/00451.2Corporate AutoTwo-Door Car
\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value Income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n", + "1 CW49887 California F Master 462680.11% 79487 \n", + "2 MY31220 California F College 899704.02% 54230 \n", + "3 UH35128 Oregon F College 2580706.30% 71210 \n", + "4 WH52799 Arizona F College 380812.21% 94903 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Total Claim Amount \\\n", + "0 88 1/0/00 633.6 \n", + "1 114 1/0/00 547.2 \n", + "2 112 1/0/00 537.6 \n", + "3 214 1/1/00 1027.2 \n", + "4 94 1/0/00 451.2 \n", + "\n", + " Policy Type Vehicle Class \n", + "0 Personal Auto Four-Door Car \n", + "1 Special Auto SUV \n", + "2 Personal Auto Two-Door Car \n", + "3 Personal Auto Luxury Car \n", + "4 Corporate Auto Two-Door Car " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerStateCustomer Lifetime ValueEducationGenderIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeTotal Claim AmountVehicle Class
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.200000Two-Door Car
1TB86706Arizona2502.637401MasterM0660Personal Auto3.468912Two-Door Car
2ZL73902Nevada3265.156348BachelorF25820820Personal Auto393.600000Four-Door Car
3KX23516California4455.843406High School or BelowF01210Personal Auto699.615192SUV
4FN77294California7704.958480High School or BelowM303661012Personal Auto484.800000SUV
\n", + "
" + ], + "text/plain": [ + " Customer State Customer Lifetime Value Education Gender \\\n", + "0 SA25987 Washington 3479.137523 High School or Below M \n", + "1 TB86706 Arizona 2502.637401 Master M \n", + "2 ZL73902 Nevada 3265.156348 Bachelor F \n", + "3 KX23516 California 4455.843406 High School or Below F \n", + "4 FN77294 California 7704.958480 High School or Below M \n", + "\n", + " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "0 0 104 0 Personal Auto \n", + "1 0 66 0 Personal Auto \n", + "2 25820 82 0 Personal Auto \n", + "3 0 121 0 Personal Auto \n", + "4 30366 101 2 Personal Auto \n", + "\n", + " Total Claim Amount Vehicle Class \n", + "0 499.200000 Two-Door Car \n", + "1 3.468912 Two-Door Car \n", + "2 393.600000 Four-Door Car \n", + "3 699.615192 SUV \n", + "4 484.800000 SUV " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(data_1.head())\n", + "print()\n", + "display(data_2.head())\n", + "print()\n", + "display(data_3.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "ef2b7abd-8e7f-4942-8c16-005996cc012f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value',\n", + " 'Income', 'Monthly Premium Auto', 'Number of Open Complaints',\n", + " 'Total Claim Amount', 'Policy Type', 'Vehicle Class'],\n", + " dtype='object')" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_2.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "1741ca6e-1fbb-4f94-8a11-45b6f9cf0506", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
....................................
4003NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4004NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4005NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4006NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4007NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

4008 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "4003 NaN NaN NaN NaN \n", + "4004 NaN NaN NaN NaN \n", + "4005 NaN NaN NaN NaN \n", + "4006 NaN NaN NaN NaN \n", + "4007 NaN NaN NaN NaN \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "0 NaN 0.0 1000.0 \n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " Total Claim Amount \n", + "0 2.704934 \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "... ... \n", + "4003 NaN \n", + "4004 NaN \n", + "4005 NaN \n", + "4006 NaN \n", + "4007 NaN \n", + "\n", + "[4008 rows x 11 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
0GS98873ArizonaFBachelor323912.47%16061881/0/00Personal AutoFour-Door Car633.600000
1CW49887CaliforniaFMaster462680.11%794871141/0/00Special AutoSUV547.200000
2MY31220CaliforniaFCollege899704.02%542301121/0/00Personal AutoTwo-Door Car537.600000
3UH35128OregonFCollege2580706.30%712102141/1/00Personal AutoLuxury Car1027.200000
4WH52799ArizonaFCollege380812.21%94903941/0/00Corporate AutoTwo-Door Car451.200000
....................................
991HV85198ArizonaMMaster847141.75%63513701/0/00Personal AutoFour-Door Car185.667213
992BS91566ArizonaFCollege543121.91%58161681/0/00Corporate AutoFour-Door Car140.747286
993IL40123NevadaFCollege568964.41%83640701/0/00Corporate AutoTwo-Door Car471.050488
994MY32149CaliforniaFMaster368672.38%0961/0/00Personal AutoTwo-Door Car28.460568
995SA91515CaliforniaMBachelor399258.39%01111/0/00Personal AutoSUV700.349052
\n", + "

996 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value Income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n", + "1 CW49887 California F Master 462680.11% 79487 \n", + "2 MY31220 California F College 899704.02% 54230 \n", + "3 UH35128 Oregon F College 2580706.30% 71210 \n", + "4 WH52799 Arizona F College 380812.21% 94903 \n", + ".. ... ... ... ... ... ... \n", + "991 HV85198 Arizona M Master 847141.75% 63513 \n", + "992 BS91566 Arizona F College 543121.91% 58161 \n", + "993 IL40123 Nevada F College 568964.41% 83640 \n", + "994 MY32149 California F Master 368672.38% 0 \n", + "995 SA91515 California M Bachelor 399258.39% 0 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "0 88 1/0/00 Personal Auto \n", + "1 114 1/0/00 Special Auto \n", + "2 112 1/0/00 Personal Auto \n", + "3 214 1/1/00 Personal Auto \n", + "4 94 1/0/00 Corporate Auto \n", + ".. ... ... ... \n", + "991 70 1/0/00 Personal Auto \n", + "992 68 1/0/00 Corporate Auto \n", + "993 70 1/0/00 Corporate Auto \n", + "994 96 1/0/00 Personal Auto \n", + "995 111 1/0/00 Personal Auto \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "0 Four-Door Car 633.600000 \n", + "1 SUV 547.200000 \n", + "2 Two-Door Car 537.600000 \n", + "3 Luxury Car 1027.200000 \n", + "4 Two-Door Car 451.200000 \n", + ".. ... ... \n", + "991 Four-Door Car 185.667213 \n", + "992 Four-Door Car 140.747286 \n", + "993 Two-Door Car 471.050488 \n", + "994 Two-Door Car 28.460568 \n", + "995 SUV 700.349052 \n", + "\n", + "[996 rows x 11 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# The first two tables are identical, the only exception is the column Total Claim Amount being in a different order.\n", + "# Re-arranging the order of the column in order for an easier concat.\n", + "\n", + "new_data_2 = data_2[['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value','Income', 'Monthly Premium Auto', 'Number of Open Complaints', 'Policy Type', 'Vehicle Class', 'Total Claim Amount']]\n", + "\n", + "#checking if new column description is correct\n", + "display(data_1)\n", + "display(new_data_2)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "e8b382d0-a957-48d2-bab9-4425612e06d4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
....................................
991HV85198ArizonaMMaster847141.75%63513.070.01/0/00Personal AutoFour-Door Car185.667213
992BS91566ArizonaFCollege543121.91%58161.068.01/0/00Corporate AutoFour-Door Car140.747286
993IL40123NevadaFCollege568964.41%83640.070.01/0/00Corporate AutoTwo-Door Car471.050488
994MY32149CaliforniaFMaster368672.38%0.096.01/0/00Personal AutoTwo-Door Car28.460568
995SA91515CaliforniaMBachelor399258.39%0.0111.01/0/00Personal AutoSUV700.349052
\n", + "

5004 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value \\\n", + "0 RB50392 Washington NaN Master NaN \n", + "1 QZ44356 Arizona F Bachelor 697953.59% \n", + "2 AI49188 Nevada F Bachelor 1288743.17% \n", + "3 WW63253 California M Bachelor 764586.18% \n", + "4 GA49547 Washington M High School or Below 536307.65% \n", + ".. ... ... ... ... ... \n", + "991 HV85198 Arizona M Master 847141.75% \n", + "992 BS91566 Arizona F College 543121.91% \n", + "993 IL40123 Nevada F College 568964.41% \n", + "994 MY32149 California F Master 368672.38% \n", + "995 SA91515 California M Bachelor 399258.39% \n", + "\n", + " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "0 0.0 1000.0 1/0/00 Personal Auto \n", + "1 0.0 94.0 1/0/00 Personal Auto \n", + "2 48767.0 108.0 1/0/00 Personal Auto \n", + "3 0.0 106.0 1/0/00 Corporate Auto \n", + "4 36357.0 68.0 1/0/00 Personal Auto \n", + ".. ... ... ... ... \n", + "991 63513.0 70.0 1/0/00 Personal Auto \n", + "992 58161.0 68.0 1/0/00 Corporate Auto \n", + "993 83640.0 70.0 1/0/00 Corporate Auto \n", + "994 0.0 96.0 1/0/00 Personal Auto \n", + "995 0.0 111.0 1/0/00 Personal Auto \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "0 Four-Door Car 2.704934 \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 \n", + ".. ... ... \n", + "991 Four-Door Car 185.667213 \n", + "992 Four-Door Car 140.747286 \n", + "993 Two-Door Car 471.050488 \n", + "994 Two-Door Car 28.460568 \n", + "995 SUV 700.349052 \n", + "\n", + "[5004 rows x 11 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "first_concat = pd.concat([data_1, new_data_2], axis=0)\n", + "display(first_concat) #we can see this worked as total amount of rows in the new DataFrame is the sum of rows of concatenated dataframes." + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "cd795cd8-b9a5-4149-859e-a2cb811b7c23", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Customer', 'State', 'Customer Lifetime Value', 'Education', 'Gender',\n", + " 'Income', 'Monthly Premium Auto', 'Number of Open Complaints',\n", + " 'Policy Type', 'Total Claim Amount', 'Vehicle Class'],\n", + " dtype='object')" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_3.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "86c7c700-2411-4349-9c10-808837ba903d", + "metadata": {}, + "outputs": [], + "source": [ + "# The DataFrame of data_3 needs further column formatting in order to match first_concat.\n", + "# Columns Education and Customer Lifetime Value need to swap order.\n", + "# Columns Total Claim Amount and Vehicle class need to swap order.\n", + "# Column Gender needs to be placed after column State.\n", + "new_data_3 = data_3[['Customer', 'State', 'Gender', 'Education', 'Customer Lifetime Value', 'Income', 'Monthly Premium Auto', 'Number of Open Complaints', 'Policy Type', 'Vehicle Class', 'Total Claim Amount']]\n", + "\n", + "# Columns State and Gender must be renamed to match columns in first_concat in order to concat without creating new columns.\n", + "new_data_3 = new_data_3.rename(columns={\"State\": \"ST\", \"Gender\": \"GENDER\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "dc49e329-4635-4ff6-b631-a50e1b822cf9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
....................................
7065LA72316CaliforniaMBachelor23405.9879871941.073.00Personal AutoFour-Door Car198.234764
7066PK87824CaliforniaFCollege3096.51121721604.079.00Corporate AutoFour-Door Car379.200000
7067TD14365CaliforniaMBachelor8163.8904280.085.03Corporate AutoFour-Door Car790.784983
7068UP19263CaliforniaMCollege7524.44243621941.096.00Personal AutoFour-Door Car691.200000
7069Y167826CaliforniaMCollege2611.8368660.077.00Corporate AutoTwo-Door Car369.600000
\n", + "

12074 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 California M Bachelor \n", + "7066 PK87824 California F College \n", + "7067 TD14365 California M Bachelor \n", + "7068 UP19263 California M College \n", + "7069 Y167826 California M College \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "0 NaN 0.0 1000.0 \n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "... ... ... ... \n", + "7065 23405.98798 71941.0 73.0 \n", + "7066 3096.511217 21604.0 79.0 \n", + "7067 8163.890428 0.0 85.0 \n", + "7068 7524.442436 21941.0 96.0 \n", + "7069 2611.836866 0.0 77.0 \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "7065 0 Personal Auto Four-Door Car \n", + "7066 0 Corporate Auto Four-Door Car \n", + "7067 3 Corporate Auto Four-Door Car \n", + "7068 0 Personal Auto Four-Door Car \n", + "7069 0 Corporate Auto Two-Door Car \n", + "\n", + " Total Claim Amount \n", + "0 2.704934 \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "... ... \n", + "7065 198.234764 \n", + "7066 379.200000 \n", + "7067 790.784983 \n", + "7068 691.200000 \n", + "7069 369.600000 \n", + "\n", + "[12074 rows x 11 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "clean_data = pd.concat([first_concat, new_data_3], axis=0)\n", + "display(clean_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "e28cfe7f-3d3f-4af7-a07f-f6275dbd1e17", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([nan, 'F', 'M', 'Femal', 'Male', 'female'], dtype=object)" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Cleaning relevant column values\n", + "\n", + "clean_data[\"GENDER\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "438f0acb-d225-4578-985e-f732acb25853", + "metadata": {}, + "outputs": [], + "source": [ + "clean_data[\"GENDER\"] = clean_data[\"GENDER\"].replace({\"Femal\": \"F\", \"Male\": \"M\", \"female\": \"F\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "60e99167-54df-45c3-91c1-8a341f192944", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Washington', 'Arizona', 'Nevada', 'California', 'Oregon', 'Cali',\n", + " 'AZ', 'WA', nan], dtype=object)" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clean_data[\"ST\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "a407d6e8-53d3-426f-9a66-38a2b8d3a52b", + "metadata": {}, + "outputs": [], + "source": [ + "clean_data[\"ST\"] = clean_data[\"ST\"].replace({\"AZ\": \"Arizona\", \"WA\": \"Washington\", \"Cali\": \"California\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "id": "4c4cf10c-d4a3-4d1b-b37e-981deb2a7ac0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Master', 'Bachelor', 'High School or Below', 'College',\n", + " 'Bachelors', 'Doctor', nan], dtype=object)" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clean_data[\"Education\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "35c12e55-e5be-4de4-a00e-658d5640b8a9", + "metadata": {}, + "outputs": [], + "source": [ + "clean_data[\"Education\"] = clean_data[\"Education\"].replace({\"Bachelors\": \"Bachelor\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "a9f3e22b-ae0f-4ba9-a01d-feaadee756a4", + "metadata": {}, + "outputs": [], + "source": [ + "clean_data[\"Customer Lifetime Value\"] = clean_data[\"Customer Lifetime Value\"].str.replace(\"%\",\" \").astype(float)" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "id": "62e77689-f00a-4c49-a2a9-52e04ded8bde", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['customer', 'st', 'gender', 'education', 'customer_lifetime_value',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'vehicle_class', 'total_claim_amount'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "# formating column names for easier access\n", + "\n", + "clean_data = clean_data.rename(columns={ col: col.replace(\" \",\"_\").lower() for col in clean_data.columns })\n", + "print(clean_data.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "33f4ee38-1c91-421d-a2be-e9b3f9e342ae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer True\n", + "st True\n", + "gender True\n", + "education True\n", + "customer_lifetime_value True\n", + "income True\n", + "monthly_premium_auto True\n", + "number_of_open_complaints True\n", + "policy_type True\n", + "vehicle_class True\n", + "total_claim_amount True\n", + "dtype: bool" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Checking for nulls\n", + "\n", + "clean_data.isna().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "4bd86462-0e97-4c23-a608-b0bb6d1845e2", + "metadata": {}, + "outputs": [], + "source": [ + "# Dealing with nulls according to Data category\n", + "\n", + "clean_data[\"customer\"] = clean_data[\"customer\"].fillna(\"Unknown\")\n", + "clean_data[\"st\"] = clean_data[\"st\"].fillna(\"Unknown\")\n", + "clean_data[\"gender\"] = clean_data[\"gender\"].fillna(\"Unknown\")\n", + "clean_data[\"education\"] = clean_data[\"education\"].fillna(\"Unknown\")\n", + "clean_data[\"policy_type\"] = clean_data[\"policy_type\"].fillna(\"Unknown\")\n", + "clean_data[\"vehicle_class\"] = clean_data[\"vehicle_class\"].fillna(\"Unknown\")\n", + "clean_data[\"number_of_open_complaints\"] = clean_data[\"number_of_open_complaints\"].fillna(\"1/0/00\")" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "82a48d2b-901e-4744-9581-5a64ff850645", + "metadata": {}, + "outputs": [], + "source": [ + "# Dealing with nulls in numerical columns\n", + "\n", + "clean_data[\"income\"] = clean_data.income.fillna(round(clean_data.income.mean(),1))\n", + "clean_data[\"monthly_premium_auto\"] = clean_data.monthly_premium_auto.fillna(round(clean_data.monthly_premium_auto.mean(),1))\n", + "clean_data[\"total_claim_amount\"] = clean_data.total_claim_amount.fillna(round(clean_data.total_claim_amount.mean(),1))\n", + "clean_data[\"customer_lifetime_value\"] = clean_data.customer_lifetime_value.fillna(round(clean_data.customer_lifetime_value.mean(),1))" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "0373f79b-9cf0-46a6-bdff-b0d401238251", + "metadata": {}, + "outputs": [], + "source": [ + "# Some values of this column are in the wrong format, fixing to show just a single integer.\n", + "# Unlike the last lab, the clean_data dataframe has unique values of different format so only the ones that are as a date type need to be corrected.\n", + "\n", + "clean_data[\"number_of_open_complaints\"].unique()\n", + "\n", + "def middle(value): \n", + " x = value.split(\"/\")\n", + " if len(x) >= 2:\n", + " return x[1]\n", + " else:\n", + " return value\n", + " \n", + "clean_data[\"number_of_open_complaints\"] = clean_data[\"number_of_open_complaints\"].apply(middle)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "37f1c2bc-832e-4b6e-9470-a0de53ef3d6a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "np.True_" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Checking for duplicates\n", + "\n", + "clean_data.duplicated().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "356b6f1b-dacb-4704-83d3-88b33932b505", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcustomerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
00RB50392WashingtonUnknownMaster779946.300.01000.00Personal AutoFour-Door Car2.704934
11QZ44356ArizonaFBachelor697953.590.094.00Personal AutoFour-Door Car1131.464935
22AI49188NevadaFBachelor1288743.1748767.0108.00Personal AutoTwo-Door Car566.472247
33WW63253CaliforniaMBachelor764586.180.0106.00Corporate AutoSUV529.881344
44GA49547WashingtonMHigh School or Below536307.6536357.068.00Personal AutoFour-Door Car17.269323
.......................................
91267065LA72316CaliforniaMBachelor779946.3071941.073.00Personal AutoFour-Door Car198.234764
91277066PK87824CaliforniaFCollege779946.3021604.079.00Corporate AutoFour-Door Car379.200000
91287067TD14365CaliforniaMBachelor779946.300.085.03Corporate AutoFour-Door Car790.784983
91297068UP19263CaliforniaMCollege779946.3021941.096.00Personal AutoFour-Door Car691.200000
91307069Y167826CaliforniaMCollege779946.300.077.00Corporate AutoTwo-Door Car369.600000
\n", + "

9131 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " index customer st gender education \\\n", + "0 0 RB50392 Washington Unknown Master \n", + "1 1 QZ44356 Arizona F Bachelor \n", + "2 2 AI49188 Nevada F Bachelor \n", + "3 3 WW63253 California M Bachelor \n", + "4 4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... ... \n", + "9126 7065 LA72316 California M Bachelor \n", + "9127 7066 PK87824 California F College \n", + "9128 7067 TD14365 California M Bachelor \n", + "9129 7068 UP19263 California M College \n", + "9130 7069 Y167826 California M College \n", + "\n", + " customer_lifetime_value income monthly_premium_auto \\\n", + "0 779946.30 0.0 1000.0 \n", + "1 697953.59 0.0 94.0 \n", + "2 1288743.17 48767.0 108.0 \n", + "3 764586.18 0.0 106.0 \n", + "4 536307.65 36357.0 68.0 \n", + "... ... ... ... \n", + "9126 779946.30 71941.0 73.0 \n", + "9127 779946.30 21604.0 79.0 \n", + "9128 779946.30 0.0 85.0 \n", + "9129 779946.30 21941.0 96.0 \n", + "9130 779946.30 0.0 77.0 \n", + "\n", + " number_of_open_complaints policy_type vehicle_class \\\n", + "0 0 Personal Auto Four-Door Car \n", + "1 0 Personal Auto Four-Door Car \n", + "2 0 Personal Auto Two-Door Car \n", + "3 0 Corporate Auto SUV \n", + "4 0 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "9126 0 Personal Auto Four-Door Car \n", + "9127 0 Corporate Auto Four-Door Car \n", + "9128 3 Corporate Auto Four-Door Car \n", + "9129 0 Personal Auto Four-Door Car \n", + "9130 0 Corporate Auto Two-Door Car \n", + "\n", + " total_claim_amount \n", + "0 2.704934 \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "... ... \n", + "9126 198.234764 \n", + "9127 379.200000 \n", + "9128 790.784983 \n", + "9129 691.200000 \n", + "9130 369.600000 \n", + "\n", + "[9131 rows x 12 columns]" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Cleaning duplicates and reseting index\n", + "\n", + "clean_data.drop_duplicates(inplace=True)\n", + "\n", + "clean_data.reset_index()" ] }, { @@ -70,18 +2139,6 @@ "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by performing data cleaning, formatting, and structuring." ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26", - "metadata": { - "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26" - }, - "outputs": [], - "source": [ - "# Your code goes here" - ] - }, { "cell_type": "markdown", "id": "df35fd0d-513e-4e77-867e-429da10a9cc7", @@ -103,6 +2160,602 @@ "2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights." ] }, + { + "cell_type": "code", + "execution_count": 108, + "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26", + "metadata": { + "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\"\n", + "\n", + "data = pd.read_csv(url)" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "bfff9b6b-fd9e-4466-a3c8-804a822918df", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
00DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
11KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
22LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
33XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
44QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
..................................................................
1090510905FE99816Nevada15563.369440NoPremiumBachelor2011-01-19UnemployedF...7Personal AutoPersonal L1Offer3Web1214.400000Luxury CarMedsizeA1
1090610906KX53892Oregon5259.444853NoBasicCollege2011-01-06EmployedF...6Personal AutoPersonal L3Offer2Branch273.018929Four-Door CarMedsizeA1
1090710907TL39050Arizona23893.304100NoExtendedBachelor2011-02-06EmployedF...2Corporate AutoCorporate L3Offer1Web381.306996Luxury SUVMedsizeA2
1090810908WA60547California11971.977650NoPremiumCollege2011-02-13EmployedF...6Personal AutoPersonal L1Offer1Branch618.288849SUVMedsizeA2
1090910909IV32877California6857.519928NoBasicBachelor2011-01-08UnemployedM...3Personal AutoPersonal L1Offer4Web1021.719397SUVMedsizeA1
\n", + "

10910 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "... ... ... ... ... ... \n", + "10905 10905 FE99816 Nevada 15563.369440 No \n", + "10906 10906 KX53892 Oregon 5259.444853 No \n", + "10907 10907 TL39050 Arizona 23893.304100 No \n", + "10908 10908 WA60547 California 11971.977650 No \n", + "10909 10909 IV32877 California 6857.519928 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "... ... ... ... ... ... ... \n", + "10905 Premium Bachelor 2011-01-19 Unemployed F ... \n", + "10906 Basic College 2011-01-06 Employed F ... \n", + "10907 Extended Bachelor 2011-02-06 Employed F ... \n", + "10908 Premium College 2011-02-13 Employed F ... \n", + "10909 Basic Bachelor 2011-01-08 Unemployed M ... \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 9 Corporate Auto Corporate L3 Offer3 \n", + "1 1 Personal Auto Personal L3 Offer4 \n", + "2 2 Personal Auto Personal L3 Offer3 \n", + "3 2 Corporate Auto Corporate L3 Offer2 \n", + "4 7 Personal Auto Personal L2 Offer1 \n", + "... ... ... ... ... \n", + "10905 7 Personal Auto Personal L1 Offer3 \n", + "10906 6 Personal Auto Personal L3 Offer2 \n", + "10907 2 Corporate Auto Corporate L3 Offer1 \n", + "10908 6 Personal Auto Personal L1 Offer1 \n", + "10909 3 Personal Auto Personal L1 Offer4 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "0 Agent 292.800000 Four-Door Car Medsize \n", + "1 Call Center 744.924331 Four-Door Car Medsize \n", + "2 Call Center 480.000000 SUV Medsize \n", + "3 Branch 484.013411 Four-Door Car Medsize \n", + "4 Branch 707.925645 Four-Door Car Medsize \n", + "... ... ... ... ... \n", + "10905 Web 1214.400000 Luxury Car Medsize \n", + "10906 Branch 273.018929 Four-Door Car Medsize \n", + "10907 Web 381.306996 Luxury SUV Medsize \n", + "10908 Branch 618.288849 SUV Medsize \n", + "10909 Web 1021.719397 SUV Medsize \n", + "\n", + " vehicle_type month \n", + "0 A 2 \n", + "1 A 1 \n", + "2 A 2 \n", + "3 A 1 \n", + "4 A 1 \n", + "... ... ... \n", + "10905 A 1 \n", + "10906 A 1 \n", + "10907 A 2 \n", + "10908 A 2 \n", + "10909 A 1 \n", + "\n", + "[10910 rows x 27 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "77de8ecb-03a8-4acd-916d-adaab91969e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "unnamed:_0 False\n", + "customer False\n", + "state False\n", + "customer_lifetime_value False\n", + "response False\n", + "coverage False\n", + "education False\n", + "effective_to_date False\n", + "employmentstatus False\n", + "gender False\n", + "income False\n", + "location_code False\n", + "marital_status False\n", + "monthly_premium_auto False\n", + "months_since_last_claim False\n", + "months_since_policy_inception False\n", + "number_of_open_complaints False\n", + "number_of_policies False\n", + "policy_type False\n", + "policy False\n", + "renew_offer_type False\n", + "sales_channel False\n", + "total_claim_amount False\n", + "vehicle_class False\n", + "vehicle_size False\n", + "vehicle_type False\n", + "month False\n", + "dtype: bool" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Checking for nulls\n", + "data.isna().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "69259494-2ece-4900-a704-f5673af5bfd4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "np.False_" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Checking for duplicates\n", + "data.duplicated().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "id": "d5e69f3d-a5d6-429b-955b-32ad979c169c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
total_claim_amount
policy_typeCorporate AutoPersonal AutoSpecial Auto
sales_channel
Agent382764.151346813.7880648.88
Branch276957.08971006.5353240.38
Call Center202683.63696214.8527702.34
Web143446.57530375.7432777.73
\n", + "
" + ], + "text/plain": [ + " total_claim_amount \n", + "policy_type Corporate Auto Personal Auto Special Auto\n", + "sales_channel \n", + "Agent 382764.15 1346813.78 80648.88\n", + "Branch 276957.08 971006.53 53240.38\n", + "Call Center 202683.63 696214.85 27702.34\n", + "Web 143446.57 530375.74 32777.73" + ] + }, + "execution_count": 129, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Using pivot, create a summary table showing the total revenue for each sales channel (branch, call center, web, and mail). \n", + "# Round the total revenue to 2 decimal points. \n", + "# Analyze the resulting table to draw insights.\n", + "\n", + "pivot_data = data.pivot_table(index='sales_channel', columns='policy_type', values=['total_claim_amount'], aggfunc=\"sum\").round(2)\n", + "pivot_data" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "id": "e1f8ccaa-5286-49d7-b5d1-8a61ac8ec013", + "metadata": {}, + "outputs": [], + "source": [ + "# Final analysis\n", + "\n", + "# The pivot table shows most of the total claims came through via Agent followed by Branch and Call center and the less coming via Web.\n", + "# The same is applicable for each specific type of claim, with exception of call center claims being lower than Web claims for Special AUto policies." + ] + }, { "cell_type": "markdown", "id": "32c7f2e5-3d90-43e5-be33-9781b6069198", @@ -146,9 +2799,9 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python [conda env:base] *", "language": "python", - "name": "python3" + "name": "conda-base-py" }, "language_info": { "codemirror_mode": { @@ -160,7 +2813,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4,