From 69e40356df160783e740f46997ba279b572767a3 Mon Sep 17 00:00:00 2001 From: jannajulianfeiten Date: Wed, 10 Sep 2025 21:28:47 +0100 Subject: [PATCH 1/2] solved lab --- lab-dw-data-structuring-and-combining.ipynb | 2875 ++++++++++++++++++- 1 file changed, 2861 insertions(+), 14 deletions(-) diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb index ec4e3f9..8751ce5 100644 --- a/lab-dw-data-structuring-and-combining.ipynb +++ b/lab-dw-data-structuring-and-combining.ipynb @@ -36,14 +36,2322 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, + "id": "fa2260cc-b023-47c2-9d1d-869c038dbba8", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "id": "492d06e3-92c7-4105-ac72-536db98d3244", "metadata": { "id": "492d06e3-92c7-4105-ac72-536db98d3244" }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
....................................
4003NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4004NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4005NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4006NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4007NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

4008 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "4003 NaN NaN NaN NaN \n", + "4004 NaN NaN NaN NaN \n", + "4005 NaN NaN NaN NaN \n", + "4006 NaN NaN NaN NaN \n", + "4007 NaN NaN NaN NaN \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "0 NaN 0.0 1000.0 \n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "4003 NaN NaN NaN \n", + "4004 NaN NaN NaN \n", + "4005 NaN NaN NaN \n", + "4006 NaN NaN NaN \n", + "4007 NaN NaN NaN \n", + "\n", + " Total Claim Amount \n", + "0 2.704934 \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "... ... \n", + "4003 NaN \n", + "4004 NaN \n", + "4005 NaN \n", + "4006 NaN \n", + "4007 NaN \n", + "\n", + "[4008 rows x 11 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1 = pd.read_csvdf = pd.read_csv (\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\")\n", + "df1" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "73e4dbf2-e2f6-48a5-9547-91866405aa32", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsTotal Claim AmountPolicy TypeVehicle Class
0GS98873ArizonaFBachelor323912.47%16061881/0/00633.600000Personal AutoFour-Door Car
1CW49887CaliforniaFMaster462680.11%794871141/0/00547.200000Special AutoSUV
2MY31220CaliforniaFCollege899704.02%542301121/0/00537.600000Personal AutoTwo-Door Car
3UH35128OregonFCollege2580706.30%712102141/1/001027.200000Personal AutoLuxury Car
4WH52799ArizonaFCollege380812.21%94903941/0/00451.200000Corporate AutoTwo-Door Car
....................................
991HV85198ArizonaMMaster847141.75%63513701/0/00185.667213Personal AutoFour-Door Car
992BS91566ArizonaFCollege543121.91%58161681/0/00140.747286Corporate AutoFour-Door Car
993IL40123NevadaFCollege568964.41%83640701/0/00471.050488Corporate AutoTwo-Door Car
994MY32149CaliforniaFMaster368672.38%0961/0/0028.460568Personal AutoTwo-Door Car
995SA91515CaliforniaMBachelor399258.39%01111/0/00700.349052Personal AutoSUV
\n", + "

996 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value Income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n", + "1 CW49887 California F Master 462680.11% 79487 \n", + "2 MY31220 California F College 899704.02% 54230 \n", + "3 UH35128 Oregon F College 2580706.30% 71210 \n", + "4 WH52799 Arizona F College 380812.21% 94903 \n", + ".. ... ... ... ... ... ... \n", + "991 HV85198 Arizona M Master 847141.75% 63513 \n", + "992 BS91566 Arizona F College 543121.91% 58161 \n", + "993 IL40123 Nevada F College 568964.41% 83640 \n", + "994 MY32149 California F Master 368672.38% 0 \n", + "995 SA91515 California M Bachelor 399258.39% 0 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Total Claim Amount \\\n", + "0 88 1/0/00 633.600000 \n", + "1 114 1/0/00 547.200000 \n", + "2 112 1/0/00 537.600000 \n", + "3 214 1/1/00 1027.200000 \n", + "4 94 1/0/00 451.200000 \n", + ".. ... ... ... \n", + "991 70 1/0/00 185.667213 \n", + "992 68 1/0/00 140.747286 \n", + "993 70 1/0/00 471.050488 \n", + "994 96 1/0/00 28.460568 \n", + "995 111 1/0/00 700.349052 \n", + "\n", + " Policy Type Vehicle Class \n", + "0 Personal Auto Four-Door Car \n", + "1 Special Auto SUV \n", + "2 Personal Auto Two-Door Car \n", + "3 Personal Auto Luxury Car \n", + "4 Corporate Auto Two-Door Car \n", + ".. ... ... \n", + "991 Personal Auto Four-Door Car \n", + "992 Corporate Auto Four-Door Car \n", + "993 Corporate Auto Two-Door Car \n", + "994 Personal Auto Two-Door Car \n", + "995 Personal Auto SUV \n", + "\n", + "[996 rows x 11 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = pd.read_csvdf = pd.read_csv (\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\")\n", + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f228e79e-03b1-4992-8a88-4c84ca653af1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerStateCustomer Lifetime ValueEducationGenderIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeTotal Claim AmountVehicle Class
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.200000Two-Door Car
1TB86706Arizona2502.637401MasterM0660Personal Auto3.468912Two-Door Car
2ZL73902Nevada3265.156348BachelorF25820820Personal Auto393.600000Four-Door Car
3KX23516California4455.843406High School or BelowF01210Personal Auto699.615192SUV
4FN77294California7704.958480High School or BelowM303661012Personal Auto484.800000SUV
\n", + "
" + ], + "text/plain": [ + " Customer State Customer Lifetime Value Education Gender \\\n", + "0 SA25987 Washington 3479.137523 High School or Below M \n", + "1 TB86706 Arizona 2502.637401 Master M \n", + "2 ZL73902 Nevada 3265.156348 Bachelor F \n", + "3 KX23516 California 4455.843406 High School or Below F \n", + "4 FN77294 California 7704.958480 High School or Below M \n", + "\n", + " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "0 0 104 0 Personal Auto \n", + "1 0 66 0 Personal Auto \n", + "2 25820 82 0 Personal Auto \n", + "3 0 121 0 Personal Auto \n", + "4 30366 101 2 Personal Auto \n", + "\n", + " Total Claim Amount Vehicle Class \n", + "0 499.200000 Two-Door Car \n", + "1 3.468912 Two-Door Car \n", + "2 393.600000 Four-Door Car \n", + "3 699.615192 SUV \n", + "4 484.800000 SUV " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3 = pd.read_csvdf = pd.read_csv (\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\")\n", + "df3.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "6a21ea05-7b32-48c5-9b7a-668eb0bf358e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim AmountStateGender
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934NaNNaN
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935NaNNaN
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247NaNNaN
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344NaNNaN
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323NaNNaN
..........................................
7065LA72316NaNNaNBachelor23405.9879871941.073.00Personal AutoFour-Door Car198.234764CaliforniaM
7066PK87824NaNNaNCollege3096.51121721604.079.00Corporate AutoFour-Door Car379.200000CaliforniaF
7067TD14365NaNNaNBachelor8163.8904280.085.03Corporate AutoFour-Door Car790.784983CaliforniaM
7068UP19263NaNNaNCollege7524.44243621941.096.00Personal AutoFour-Door Car691.200000CaliforniaM
7069Y167826NaNNaNCollege2611.8368660.077.00Corporate AutoTwo-Door Car369.600000CaliforniaM
\n", + "

12074 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "7065 LA72316 NaN NaN Bachelor \n", + "7066 PK87824 NaN NaN College \n", + "7067 TD14365 NaN NaN Bachelor \n", + "7068 UP19263 NaN NaN College \n", + "7069 Y167826 NaN NaN College \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "0 NaN 0.0 1000.0 \n", + "1 697953.59% 0.0 94.0 \n", + "2 1288743.17% 48767.0 108.0 \n", + "3 764586.18% 0.0 106.0 \n", + "4 536307.65% 36357.0 68.0 \n", + "... ... ... ... \n", + "7065 23405.98798 71941.0 73.0 \n", + "7066 3096.511217 21604.0 79.0 \n", + "7067 8163.890428 0.0 85.0 \n", + "7068 7524.442436 21941.0 96.0 \n", + "7069 2611.836866 0.0 77.0 \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "7065 0 Personal Auto Four-Door Car \n", + "7066 0 Corporate Auto Four-Door Car \n", + "7067 3 Corporate Auto Four-Door Car \n", + "7068 0 Personal Auto Four-Door Car \n", + "7069 0 Corporate Auto Two-Door Car \n", + "\n", + " Total Claim Amount State Gender \n", + "0 2.704934 NaN NaN \n", + "1 1131.464935 NaN NaN \n", + "2 566.472247 NaN NaN \n", + "3 529.881344 NaN NaN \n", + "4 17.269323 NaN NaN \n", + "... ... ... ... \n", + "7065 198.234764 California M \n", + "7066 379.200000 California F \n", + "7067 790.784983 California M \n", + "7068 691.200000 California M \n", + "7069 369.600000 California M \n", + "\n", + "[12074 rows x 13 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df1, df2, df3], axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "b3d034c6-25b9-4d57-9c11-273e4c4d2200", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
0FalseFalseTrueFalseTrueFalseFalseFalseFalseFalseFalse
1FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
2FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
3FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
4FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
....................................
4003TrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
4004TrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
4005TrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
4006TrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
4007TrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
\n", + "

4008 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value Income \\\n", + "0 False False True False True False \n", + "1 False False False False False False \n", + "2 False False False False False False \n", + "3 False False False False False False \n", + "4 False False False False False False \n", + "... ... ... ... ... ... ... \n", + "4003 True True True True True True \n", + "4004 True True True True True True \n", + "4005 True True True True True True \n", + "4006 True True True True True True \n", + "4007 True True True True True True \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "0 False False False \n", + "1 False False False \n", + "2 False False False \n", + "3 False False False \n", + "4 False False False \n", + "... ... ... ... \n", + "4003 True True True \n", + "4004 True True True \n", + "4005 True True True \n", + "4006 True True True \n", + "4007 True True True \n", + "\n", + " Vehicle Class Total Claim Amount \n", + "0 False False \n", + "1 False False \n", + "2 False False \n", + "3 False False \n", + "4 False False \n", + "... ... ... \n", + "4003 True True \n", + "4004 True True \n", + "4005 True True \n", + "4006 True True \n", + "4007 True True \n", + "\n", + "[4008 rows x 11 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " df.isnull()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3dcbea52-68d6-424e-a482-0b71f1b2f6bb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Customer True\n", + "ST True\n", + "GENDER True\n", + "Education True\n", + "Customer Lifetime Value True\n", + "Income True\n", + "Monthly Premium Auto True\n", + "Number of Open Complaints True\n", + "Policy Type True\n", + "Vehicle Class True\n", + "Total Claim Amount True\n", + "dtype: bool" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "5fb9208d-6e20-4206-9129-0e8c087cd44e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Customer 2937\n", + "ST 2937\n", + "GENDER 3054\n", + "Education 2937\n", + "Customer Lifetime Value 2940\n", + "Income 2937\n", + "Monthly Premium Auto 2937\n", + "Number of Open Complaints 2937\n", + "Policy Type 2937\n", + "Vehicle Class 2937\n", + "Total Claim Amount 2937\n", + "dtype: int64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "a378d21d-5ef8-49d7-aa73-3658f763369b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "4003 11\n", + "4004 11\n", + "4005 11\n", + "4006 11\n", + "4007 11\n", + "Length: 4008, dtype: int64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "4ed75f4a-37dd-483f-a89b-7fdd18de6342", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4008, 11)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "33836ac6-d833-406d-84ac-f2a3f635e0e1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Customer object\n", + "ST object\n", + "GENDER object\n", + "Education object\n", + "Customer Lifetime Value object\n", + "Income float64\n", + "Monthly Premium Auto float64\n", + "Number of Open Complaints object\n", + "Policy Type object\n", + "Vehicle Class object\n", + "Total Claim Amount float64\n", + "dtype: object" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "d0862e10-6069-42a5-acf4-cabe309b1dba", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Income.fillna(df.Income.mean()).head" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "7ce0bba0-98f4-49ec-9fc0-f05fb819e0e9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1000.00000\n", + "1 94.00000\n", + "2 108.00000\n", + "3 106.00000\n", + "4 68.00000\n", + " ... \n", + "4003 193.23436\n", + "4004 193.23436\n", + "4005 193.23436\n", + "4006 193.23436\n", + "4007 193.23436\n", + "Name: Monthly Premium Auto, Length: 4008, dtype: float64" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Monthly Premium Auto\"].fillna(df[\"Monthly Premium Auto\"].mean())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "784117be-a0f0-47d1-855e-5b3d9d77ad8c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2.704934\n", + "1 1131.464935\n", + "2 566.472247\n", + "3 529.881344\n", + "4 17.269323\n", + " ... \n", + "4003 404.986909\n", + "4004 404.986909\n", + "4005 404.986909\n", + "4006 404.986909\n", + "4007 404.986909\n", + "Name: Total Claim Amount, Length: 4008, dtype: float64" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Total Claim Amount\"].fillna(df[\"Total Claim Amount\"].mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "64dd0640-9a2a-4624-8d29-e36c7bca28a4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.\n" + ] + }, + { + "data": { + "text/plain": [ + "np.int64(2936)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.duplicated().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "a25c5d24-6015-4446-b1fe-a8f9d5b7703f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "np.True_" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.duplicated().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "1643bf84-1976-4340-a611-83681a7be8c5", + "metadata": {}, "outputs": [], "source": [ - "# Your code goes here" + "df.drop_duplicates(inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "c0ba21f0-8d25-4798-92b5-de620aa4129a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "6f935055-dc6e-423e-98cb-4a4111324f54", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
1067VJ51327CaliFHigh School or Below2031499.76%63209.0102.000001/2/00Personal AutoSUV207.320041
1068GS98873ArizonaFBachelor323912.47%16061.088.000001/0/00Personal AutoFour-Door Car633.600000
1069CW49887CaliforniaFMaster462680.11%79487.0114.000001/0/00Special AutoSUV547.200000
1070MY31220CaliforniaFCollege899704.02%54230.0112.000001/0/00Personal AutoTwo-Door Car537.600000
1071NaNNaNNaNNaNNaNNaN193.23436NaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education \\\n", + "1067 VJ51327 Cali F High School or Below \n", + "1068 GS98873 Arizona F Bachelor \n", + "1069 CW49887 California F Master \n", + "1070 MY31220 California F College \n", + "1071 NaN NaN NaN NaN \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "1067 2031499.76% 63209.0 102.00000 \n", + "1068 323912.47% 16061.0 88.00000 \n", + "1069 462680.11% 79487.0 114.00000 \n", + "1070 899704.02% 54230.0 112.00000 \n", + "1071 NaN NaN 193.23436 \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "1067 1/2/00 Personal Auto SUV \n", + "1068 1/0/00 Personal Auto Four-Door Car \n", + "1069 1/0/00 Special Auto SUV \n", + "1070 1/0/00 Personal Auto Two-Door Car \n", + "1071 NaN NaN NaN \n", + "\n", + " Total Claim Amount \n", + "1067 207.320041 \n", + "1068 633.600000 \n", + "1069 547.200000 \n", + "1070 537.600000 \n", + "1071 NaN " + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_without_duplicates = df.copy()\n", + "df_without_duplicates = df.drop_duplicates()\n", + "df_without_duplicates.tail()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "6faf81e9-0413-4910-8f61-6f122ca78880", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexCustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
00RB50392WashingtonNaNMasterNaN0.01000.000001/0/00Personal AutoFour-Door Car2.704934
11QZ44356ArizonaFBachelor697953.59%0.094.000001/0/00Personal AutoFour-Door Car1131.464935
22AI49188NevadaFBachelor1288743.17%48767.0108.000001/0/00Personal AutoTwo-Door Car566.472247
33WW63253CaliforniaMBachelor764586.18%0.0106.000001/0/00Corporate AutoSUV529.881344
44GA49547WashingtonMHigh School or Below536307.65%36357.068.000001/0/00Personal AutoFour-Door Car17.269323
.......................................
10671067VJ51327CaliFHigh School or Below2031499.76%63209.0102.000001/2/00Personal AutoSUV207.320041
10681068GS98873ArizonaFBachelor323912.47%16061.088.000001/0/00Personal AutoFour-Door Car633.600000
10691069CW49887CaliforniaFMaster462680.11%79487.0114.000001/0/00Special AutoSUV547.200000
10701070MY31220CaliforniaFCollege899704.02%54230.0112.000001/0/00Personal AutoTwo-Door Car537.600000
10711071NaNNaNNaNNaNNaNNaN193.23436NaNNaNNaNNaN
\n", + "

1072 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " index Customer ST GENDER Education \\\n", + "0 0 RB50392 Washington NaN Master \n", + "1 1 QZ44356 Arizona F Bachelor \n", + "2 2 AI49188 Nevada F Bachelor \n", + "3 3 WW63253 California M Bachelor \n", + "4 4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... ... \n", + "1067 1067 VJ51327 Cali F High School or Below \n", + "1068 1068 GS98873 Arizona F Bachelor \n", + "1069 1069 CW49887 California F Master \n", + "1070 1070 MY31220 California F College \n", + "1071 1071 NaN NaN NaN NaN \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "0 NaN 0.0 1000.00000 \n", + "1 697953.59% 0.0 94.00000 \n", + "2 1288743.17% 48767.0 108.00000 \n", + "3 764586.18% 0.0 106.00000 \n", + "4 536307.65% 36357.0 68.00000 \n", + "... ... ... ... \n", + "1067 2031499.76% 63209.0 102.00000 \n", + "1068 323912.47% 16061.0 88.00000 \n", + "1069 462680.11% 79487.0 114.00000 \n", + "1070 899704.02% 54230.0 112.00000 \n", + "1071 NaN NaN 193.23436 \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "1067 1/2/00 Personal Auto SUV \n", + "1068 1/0/00 Personal Auto Four-Door Car \n", + "1069 1/0/00 Special Auto SUV \n", + "1070 1/0/00 Personal Auto Two-Door Car \n", + "1071 NaN NaN NaN \n", + "\n", + " Total Claim Amount \n", + "0 2.704934 \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "... ... \n", + "1067 207.320041 \n", + "1068 633.600000 \n", + "1069 547.200000 \n", + "1070 537.600000 \n", + "1071 NaN \n", + "\n", + "[1072 rows x 12 columns]" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_without_duplicates.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "ca275cff-9981-4a7c-85da-d611cf1fdaf0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value',\n", + " 'Income', 'Monthly Premium Auto', 'Number of Open Complaints',\n", + " 'Policy Type', 'Vehicle Class', 'Total Claim Amount'],\n", + " dtype='object')" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "28a9788a-f36f-4da4-9820-280031242b05", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerStateGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim Amount
0RB50392WashingtonNaNMasterNaN0.01000.000001/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.000001/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.000001/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.000001/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.000001/0/00Personal AutoFour-Door Car17.269323
....................................
1067VJ51327CaliFHigh School or Below2031499.76%63209.0102.000001/2/00Personal AutoSUV207.320041
1068GS98873ArizonaFBachelor323912.47%16061.088.000001/0/00Personal AutoFour-Door Car633.600000
1069CW49887CaliforniaFMaster462680.11%79487.0114.000001/0/00Special AutoSUV547.200000
1070MY31220CaliforniaFCollege899704.02%54230.0112.000001/0/00Personal AutoTwo-Door Car537.600000
1071NaNNaNNaNNaNNaNNaN193.23436NaNNaNNaNNaN
\n", + "

1072 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Customer State GENDER Education \\\n", + "0 RB50392 Washington NaN Master \n", + "1 QZ44356 Arizona F Bachelor \n", + "2 AI49188 Nevada F Bachelor \n", + "3 WW63253 California M Bachelor \n", + "4 GA49547 Washington M High School or Below \n", + "... ... ... ... ... \n", + "1067 VJ51327 Cali F High School or Below \n", + "1068 GS98873 Arizona F Bachelor \n", + "1069 CW49887 California F Master \n", + "1070 MY31220 California F College \n", + "1071 NaN NaN NaN NaN \n", + "\n", + " Customer Lifetime Value Income Monthly Premium Auto \\\n", + "0 NaN 0.0 1000.00000 \n", + "1 697953.59% 0.0 94.00000 \n", + "2 1288743.17% 48767.0 108.00000 \n", + "3 764586.18% 0.0 106.00000 \n", + "4 536307.65% 36357.0 68.00000 \n", + "... ... ... ... \n", + "1067 2031499.76% 63209.0 102.00000 \n", + "1068 323912.47% 16061.0 88.00000 \n", + "1069 462680.11% 79487.0 114.00000 \n", + "1070 899704.02% 54230.0 112.00000 \n", + "1071 NaN NaN 193.23436 \n", + "\n", + " Number of Open Complaints Policy Type Vehicle Class \\\n", + "0 1/0/00 Personal Auto Four-Door Car \n", + "1 1/0/00 Personal Auto Four-Door Car \n", + "2 1/0/00 Personal Auto Two-Door Car \n", + "3 1/0/00 Corporate Auto SUV \n", + "4 1/0/00 Personal Auto Four-Door Car \n", + "... ... ... ... \n", + "1067 1/2/00 Personal Auto SUV \n", + "1068 1/0/00 Personal Auto Four-Door Car \n", + "1069 1/0/00 Special Auto SUV \n", + "1070 1/0/00 Personal Auto Two-Door Car \n", + "1071 NaN NaN NaN \n", + "\n", + " Total Claim Amount \n", + "0 2.704934 \n", + "1 1131.464935 \n", + "2 566.472247 \n", + "3 529.881344 \n", + "4 17.269323 \n", + "... ... \n", + "1067 207.320041 \n", + "1068 633.600000 \n", + "1069 547.200000 \n", + "1070 537.600000 \n", + "1071 NaN \n", + "\n", + "[1072 rows x 11 columns]" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.rename(columns={'ST': 'State'}, inplace=True)\n", + "df" ] }, { @@ -72,14 +2380,309 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 78, "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26", "metadata": { "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26" }, "outputs": [], "source": [ - "# Your code goes here" + "import pandas as pd\n", + "\n", + "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "0bca1aba-6e82-4b83-950c-fd7e1c0b809b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
00DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
11KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
22LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
33XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
44QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
\n", + "

5 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 9 Corporate Auto Corporate L3 Offer3 \n", + "1 1 Personal Auto Personal L3 Offer4 \n", + "2 2 Personal Auto Personal L3 Offer3 \n", + "3 2 Corporate Auto Corporate L3 Offer2 \n", + "4 7 Personal Auto Personal L2 Offer1 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "0 Agent 292.800000 Four-Door Car Medsize \n", + "1 Call Center 744.924331 Four-Door Car Medsize \n", + "2 Call Center 480.000000 SUV Medsize \n", + "3 Branch 484.013411 Four-Door Car Medsize \n", + "4 Branch 707.925645 Four-Door Car Medsize \n", + "\n", + " vehicle_type month \n", + "0 A 2 \n", + "1 A 1 \n", + "2 A 2 \n", + "3 A 1 \n", + "4 A 1 \n", + "\n", + "[5 rows x 27 columns]" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "49ba083a-9d4f-4025-aae0-d3493c5a09c6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "unnamed:_0 False\n", + "customer False\n", + "state False\n", + "customer_lifetime_value False\n", + "response False\n", + "coverage False\n", + "education False\n", + "effective_to_date False\n", + "employmentstatus False\n", + "gender False\n", + "income False\n", + "location_code False\n", + "marital_status False\n", + "monthly_premium_auto False\n", + "months_since_last_claim False\n", + "months_since_policy_inception False\n", + "number_of_open_complaints False\n", + "number_of_policies False\n", + "policy_type False\n", + "policy False\n", + "renew_offer_type False\n", + "sales_channel False\n", + "total_claim_amount False\n", + "vehicle_class False\n", + "vehicle_size False\n", + "vehicle_type False\n", + "month False\n", + "dtype: bool" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "42b0bcfc-497c-45d6-aca1-29b062e6e81c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "np.int64(0)" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.duplicated().sum()" ] }, { @@ -93,6 +2696,116 @@ "Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights." ] }, + { + "cell_type": "code", + "execution_count": 90, + "id": "baeaf18a-7a90-4a19-b178-2356071397e7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " total_claim_amount\n", + "sales_channel \n", + "Agent 1.810227e+06\n", + "Branch 1.301204e+06\n", + "Call Center 9.266008e+05\n", + "Web 7.066000e+05\n" + ] + } + ], + "source": [ + "summary = df.pivot_table(index=\"sales_channel\",values=\"total_claim_amount\",aggfunc=\"sum\")\n", + "print(summary)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "517e43fb-79e1-434f-8595-399527737827", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
total_claim_amount
sales_channel
Agent1810226.82
Branch1301204.00
Call Center926600.82
Web706600.04
\n", + "
" + ], + "text/plain": [ + " total_claim_amount\n", + "sales_channel \n", + "Agent 1810226.82\n", + "Branch 1301204.00\n", + "Call Center 926600.82\n", + "Web 706600.04" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "summary = summary.round(2)\n", + "summary" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "1735e50d-a4ae-4254-b7d3-891efee90957", + "metadata": {}, + "outputs": [], + "source": [ + "# The Agent channel generated the highest revenue (1.81M), almost 40% more than Branch.\n", + "#Web brought the lowest total revenue, only 706K, less than half of Agent.\n", + "#Strongest channels are Agent and Branch." + ] + }, { "cell_type": "markdown", "id": "640993b2-a291-436c-a34d-a551144f8196", @@ -103,6 +2816,80 @@ "2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights." ] }, + { + "cell_type": "code", + "execution_count": 103, + "id": "a9a2eb9a-7cbf-46fc-a364-655d6c65e416", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_lifetime_value
gender
F8071.105001
M7963.039566
\n", + "
" + ], + "text/plain": [ + " customer_lifetime_value\n", + "gender \n", + "F 8071.105001\n", + "M 7963.039566" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "summary2 = df.pivot_table(index=\"gender\",values=\"customer_lifetime_value\",aggfunc=\"mean\")\n", + "summary2" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "c2e404cb-aeeb-491f-888c-98b19d09d32b", + "metadata": {}, + "outputs": [], + "source": [ + "#On average, female customers bring $8071, compared to $7963 for male customers." + ] + }, { "cell_type": "markdown", "id": "32c7f2e5-3d90-43e5-be33-9781b6069198", @@ -130,14 +2917,74 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "3a069e0b-b400-470e-904d-d17582191be4", - "metadata": { - "id": "3a069e0b-b400-470e-904d-d17582191be4" - }, - "outputs": [], + "execution_count": 110, + "id": "be4a576f-e317-4c16-ba0c-0f4ff5132677", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
policy_typeCorporate AutoPersonal AutoSpecial Auto
month
1443.4349521727.60572287.074049
2385.2081351453.68444195.226817
\n", + "
" + ], + "text/plain": [ + "policy_type Corporate Auto Personal Auto Special Auto\n", + "month \n", + "1 443.434952 1727.605722 87.074049\n", + "2 385.208135 1453.684441 95.226817" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code goes here" + "summary3 = df.pivot_table(index=\"month\", columns=\"policy_type\", values = \"number_of_open_complaints\", aggfunc = \"sum\")\n", + "summary3" ] } ], @@ -146,9 +2993,9 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python [conda env:base] *", "language": "python", - "name": "python3" + "name": "conda-base-py" }, "language_info": { "codemirror_mode": { @@ -160,7 +3007,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4, From 5db72e11305063ca1e66d48dc3f749eee9978ede Mon Sep 17 00:00:00 2001 From: jannajulianfeiten Date: Wed, 10 Sep 2025 21:38:08 +0100 Subject: [PATCH 2/2] lab solved2 --- lab-dw-data-structuring-and-combining.ipynb | 145 ++++++++++---------- 1 file changed, 72 insertions(+), 73 deletions(-) diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb index 8751ce5..097bbb2 100644 --- a/lab-dw-data-structuring-and-combining.ipynb +++ b/lab-dw-data-structuring-and-combining.ipynb @@ -2818,66 +2818,33 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 111, "id": "a9a2eb9a-7cbf-46fc-a364-655d6c65e416", "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_lifetime_value
gender
F8071.105001
M7963.039566
\n", - "
" - ], - "text/plain": [ - " customer_lifetime_value\n", - "gender \n", - "F 8071.105001\n", - "M 7963.039566" - ] - }, - "execution_count": 103, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "gender F M\n", + "education \n", + "Bachelor 7874.27 7703.60\n", + "College 7748.82 8052.46\n", + "Doctor 7328.51 7415.33\n", + "High School or Below 8675.22 8149.69\n", + "Master 8157.05 8168.83\n" + ] } ], "source": [ - "summary2 = df.pivot_table(index=\"gender\",values=\"customer_lifetime_value\",aggfunc=\"mean\")\n", - "summary2" + "summary = df.pivot_table(\n", + " index=\"education\",\n", + " columns=\"gender\",\n", + " values=\"customer_lifetime_value\",\n", + " aggfunc=\"mean\"\n", + ").round(2)\n", + "\n", + "print(summary)\n" ] }, { @@ -2887,7 +2854,9 @@ "metadata": {}, "outputs": [], "source": [ - "#On average, female customers bring $8071, compared to $7963 for male customers." + "# Customers with Doctorate and Master’s degrees show the highest lifetime values, suggesting that higher education correlates with higher long-term revenue potential.\n", + "#Across all education levels, the gap between Female and Male customers is small.\n", + "#This means education level matters more than gender when predicting customer value." ] }, { @@ -2917,7 +2886,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 113, "id": "be4a576f-e317-4c16-ba0c-0f4ff5132677", "metadata": {}, "outputs": [ @@ -2941,29 +2910,47 @@ "\n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2971,21 +2958,33 @@ "" ], "text/plain": [ - "policy_type Corporate Auto Personal Auto Special Auto\n", - "month \n", - "1 443.434952 1727.605722 87.074049\n", - "2 385.208135 1453.684441 95.226817" + " month policy_type number_of_complaints\n", + "0 1 Corporate Auto 443.434952\n", + "1 1 Personal Auto 1727.605722\n", + "2 1 Special Auto 87.074049\n", + "3 2 Corporate Auto 385.208135\n", + "4 2 Personal Auto 1453.684441\n", + "5 2 Special Auto 95.226817" ] }, - "execution_count": 110, + "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "summary3 = df.pivot_table(index=\"month\", columns=\"policy_type\", values = \"number_of_open_complaints\", aggfunc = \"sum\")\n", - "summary3" + "long_table = summary3.stack().reset_index()\n", + "long_table.columns = [\"month\", \"policy_type\", \"number_of_complaints\"]\n", + "long_table" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f42d06af-1a7d-4ba2-9f92-faf12d5218dc", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {
policy_typeCorporate AutoPersonal AutoSpecial Auto
monthmonthpolicy_typenumber_of_complaints
101Corporate Auto443.434952
11Personal Auto1727.60572287.074049
21Special Auto87.074049
32Corporate Auto385.208135
42Personal Auto1453.684441
52Special Auto95.226817