diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb index ec4e3f9..df06866 100644 --- a/lab-dw-data-structuring-and-combining.ipynb +++ b/lab-dw-data-structuring-and-combining.ipynb @@ -36,14 +36,1758 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "492d06e3-92c7-4105-ac72-536db98d3244", "metadata": { "id": "492d06e3-92c7-4105-ac72-536db98d3244" }, + "outputs": [ + { + "data": { + "text/plain": [ + "(4008, 11)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code goes here\n", + "import pandas as pd\n", + "\n", + "url1 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\"\n", + "url2 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\"\n", + "url3 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\"\n", + "\n", + "df1 = pd.read_csv(url1)\n", + "df2 = pd.read_csv(url2)\n", + "df3 = pd.read_csv(url3)\n", + "\n", + "df1.head()\n", + "df1.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e7000919", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsTotal Claim AmountPolicy TypeVehicle Class
0GS98873ArizonaFBachelor323912.47%16061881/0/00633.6Personal AutoFour-Door Car
1CW49887CaliforniaFMaster462680.11%794871141/0/00547.2Special AutoSUV
2MY31220CaliforniaFCollege899704.02%542301121/0/00537.6Personal AutoTwo-Door Car
3UH35128OregonFCollege2580706.30%712102141/1/001027.2Personal AutoLuxury Car
4WH52799ArizonaFCollege380812.21%94903941/0/00451.2Corporate AutoTwo-Door Car
\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value Income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n", + "1 CW49887 California F Master 462680.11% 79487 \n", + "2 MY31220 California F College 899704.02% 54230 \n", + "3 UH35128 Oregon F College 2580706.30% 71210 \n", + "4 WH52799 Arizona F College 380812.21% 94903 \n", + "\n", + " Monthly Premium Auto Number of Open Complaints Total Claim Amount \\\n", + "0 88 1/0/00 633.6 \n", + "1 114 1/0/00 547.2 \n", + "2 112 1/0/00 537.6 \n", + "3 214 1/1/00 1027.2 \n", + "4 94 1/0/00 451.2 \n", + "\n", + " Policy Type Vehicle Class \n", + "0 Personal Auto Four-Door Car \n", + "1 Special Auto SUV \n", + "2 Personal Auto Two-Door Car \n", + "3 Personal Auto Luxury Car \n", + "4 Corporate Auto Two-Door Car " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0f99b1b3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerStateCustomer Lifetime ValueEducationGenderIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeTotal Claim AmountVehicle Class
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.200000Two-Door Car
1TB86706Arizona2502.637401MasterM0660Personal Auto3.468912Two-Door Car
2ZL73902Nevada3265.156348BachelorF25820820Personal Auto393.600000Four-Door Car
3KX23516California4455.843406High School or BelowF01210Personal Auto699.615192SUV
4FN77294California7704.958480High School or BelowM303661012Personal Auto484.800000SUV
\n", + "
" + ], + "text/plain": [ + " Customer State Customer Lifetime Value Education Gender \\\n", + "0 SA25987 Washington 3479.137523 High School or Below M \n", + "1 TB86706 Arizona 2502.637401 Master M \n", + "2 ZL73902 Nevada 3265.156348 Bachelor F \n", + "3 KX23516 California 4455.843406 High School or Below F \n", + "4 FN77294 California 7704.958480 High School or Below M \n", + "\n", + " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "0 0 104 0 Personal Auto \n", + "1 0 66 0 Personal Auto \n", + "2 25820 82 0 Personal Auto \n", + "3 0 121 0 Personal Auto \n", + "4 30366 101 2 Personal Auto \n", + "\n", + " Total Claim Amount Vehicle Class \n", + "0 499.200000 Two-Door Car \n", + "1 3.468912 Two-Door Car \n", + "2 393.600000 Four-Door Car \n", + "3 699.615192 SUV \n", + "4 484.800000 SUV " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "757d54d2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['customer', 'state', 'customer_lifetime_value', 'education', 'gender',\n", + " 'income', 'monthly_premium_auto', 'number_of_open_complaints',\n", + " 'policy_type', 'total_claim_amount', 'vehicle_class'],\n", + " dtype='object')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.columns = df1.columns.str.replace(\"ST\", \"state\")\n", + "\n", + "df1.columns = (\n", + " df1.columns\n", + " .str.lower()\n", + " .str.replace(\" \", \"_\")\n", + ")\n", + "\n", + "df1.columns\n", + "\n", + "df2.columns = df2.columns.str.replace(\"ST\", \"state\")\n", + "\n", + "df2.columns = (\n", + " df2.columns\n", + " .str.lower()\n", + " .str.replace(\" \", \"_\")\n", + ")\n", + "\n", + "df2.columns\n", + "\n", + "df3.columns = df3.columns.str.replace(\"ST\", \"state\")\n", + "\n", + "df3.columns = (\n", + " df3.columns\n", + " .str.lower()\n", + " .str.replace(\" \", \"_\")\n", + ")\n", + "\n", + "df3.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "12c6ebe3", + "metadata": {}, "outputs": [], "source": [ - "# Your code goes here" + "df_all = pd.concat([df1, df2, df3], axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7453630e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(12074, 11)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
5OC83172OregonFBachelor825629.78%62902.069.01/0/00Personal AutoTwo-Door Car159.383042
6XZ87318OregonFCollege538089.86%55350.067.01/0/00Corporate AutoFour-Door Car321.600000
7CF85061ArizonaMMaster721610.03%0.0101.01/0/00Corporate AutoFour-Door Car363.029680
8DY87989OregonMBachelor2412750.40%14072.071.01/0/00Corporate AutoFour-Door Car511.200000
9BQ94931OregonFCollege738817.81%28812.093.01/0/00Special AutoFour-Door Car425.527834
10SX51350CaliforniaMCollege473899.20%0.067.01/0/00Personal AutoFour-Door Car482.400000
11VQ65197CaliforniaNaNCollege819719.71%0.0110.01/0/00Personal AutoSUV528.000000
12DP39365CaliforniaNaNMaster879879.70%77026.0110.01/2/00Corporate AutoFour-Door Car472.029737
13SJ95423ArizonaNaNHigh School or Below881901.89%99845.0110.01/1/00Corporate AutoSUV528.000000
14IL66569CaliforniaNaNCollege538443.17%83689.070.01/2/00Corporate AutoFour-Door Car307.139132
15BW63560OregonNaNBachelor746313.94%24599.064.01/1/00Corporate AutoFour-Door Car42.920271
16FV94802NevadaNaNHigh School or Below256686.78%25049.067.01/0/00Personal AutoTwo-Door Car454.245098
17OE15005CaliNaNCollege394524.16%28855.0101.01/0/00Personal AutoSUV647.442031
18WC83389OregonNaNCollege571033.31%51148.072.01/0/00Personal AutoFour-Door Car308.981664
19FL50705CaliforniaNaNHigh School or Below816261.71%66140.0101.01/0/00Corporate AutoFour-Door Car484.800000
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value \\\n", + "0 RB50392 Washington NaN Master NaN \n", + "1 QZ44356 Arizona F Bachelor 697953.59% \n", + "2 AI49188 Nevada F Bachelor 1288743.17% \n", + "3 WW63253 California M Bachelor 764586.18% \n", + "4 GA49547 Washington M High School or Below 536307.65% \n", + "5 OC83172 Oregon F Bachelor 825629.78% \n", + "6 XZ87318 Oregon F College 538089.86% \n", + "7 CF85061 Arizona M Master 721610.03% \n", + "8 DY87989 Oregon M Bachelor 2412750.40% \n", + "9 BQ94931 Oregon F College 738817.81% \n", + "10 SX51350 California M College 473899.20% \n", + "11 VQ65197 California NaN College 819719.71% \n", + "12 DP39365 California NaN Master 879879.70% \n", + "13 SJ95423 Arizona NaN High School or Below 881901.89% \n", + "14 IL66569 California NaN College 538443.17% \n", + "15 BW63560 Oregon NaN Bachelor 746313.94% \n", + "16 FV94802 Nevada NaN High School or Below 256686.78% \n", + "17 OE15005 Cali NaN College 394524.16% \n", + "18 WC83389 Oregon NaN College 571033.31% \n", + "19 FL50705 California NaN High School or Below 816261.71% \n", + "\n", + " income monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 0.0 1000.0 1/0/00 Personal Auto \n", + "1 0.0 94.0 1/0/00 Personal Auto \n", + "2 48767.0 108.0 1/0/00 Personal Auto \n", + "3 0.0 106.0 1/0/00 Corporate Auto \n", + "4 36357.0 68.0 1/0/00 Personal Auto \n", + "5 62902.0 69.0 1/0/00 Personal Auto \n", + "6 55350.0 67.0 1/0/00 Corporate Auto \n", + "7 0.0 101.0 1/0/00 Corporate Auto \n", + "8 14072.0 71.0 1/0/00 Corporate Auto \n", + "9 28812.0 93.0 1/0/00 Special Auto \n", + "10 0.0 67.0 1/0/00 Personal Auto \n", + "11 0.0 110.0 1/0/00 Personal Auto \n", + "12 77026.0 110.0 1/2/00 Corporate Auto \n", + "13 99845.0 110.0 1/1/00 Corporate Auto \n", + "14 83689.0 70.0 1/2/00 Corporate Auto \n", + "15 24599.0 64.0 1/1/00 Corporate Auto \n", + "16 25049.0 67.0 1/0/00 Personal Auto \n", + "17 28855.0 101.0 1/0/00 Personal Auto \n", + "18 51148.0 72.0 1/0/00 Personal Auto \n", + "19 66140.0 101.0 1/0/00 Corporate Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 2.704934 \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 \n", + "5 Two-Door Car 159.383042 \n", + "6 Four-Door Car 321.600000 \n", + "7 Four-Door Car 363.029680 \n", + "8 Four-Door Car 511.200000 \n", + "9 Four-Door Car 425.527834 \n", + "10 Four-Door Car 482.400000 \n", + "11 SUV 528.000000 \n", + "12 Four-Door Car 472.029737 \n", + "13 SUV 528.000000 \n", + "14 Four-Door Car 307.139132 \n", + "15 Four-Door Car 42.920271 \n", + "16 Two-Door Car 454.245098 \n", + "17 SUV 647.442031 \n", + "18 Four-Door Car 308.981664 \n", + "19 Four-Door Car 484.800000 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(df_all.shape)\n", + "df_all.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a27d440a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.590.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.1748767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.180.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.6536357.068.01/0/00Personal AutoFour-Door Car17.269323
5OC83172OregonFBachelor825629.7862902.069.01/0/00Personal AutoTwo-Door Car159.383042
6XZ87318OregonFCollege538089.8655350.067.01/0/00Corporate AutoFour-Door Car321.600000
7CF85061ArizonaMMaster721610.030.0101.01/0/00Corporate AutoFour-Door Car363.029680
8DY87989OregonMBachelor2412750.4014072.071.01/0/00Corporate AutoFour-Door Car511.200000
9BQ94931OregonFCollege738817.8128812.093.01/0/00Special AutoFour-Door Car425.527834
10SX51350CaliforniaMCollege473899.200.067.01/0/00Personal AutoFour-Door Car482.400000
11VQ65197CaliforniaNaNCollege819719.710.0110.01/0/00Personal AutoSUV528.000000
12DP39365CaliforniaNaNMaster879879.7077026.0110.01/2/00Corporate AutoFour-Door Car472.029737
13SJ95423ArizonaNaNHigh School or Below881901.8999845.0110.01/1/00Corporate AutoSUV528.000000
14IL66569CaliforniaNaNCollege538443.1783689.070.01/2/00Corporate AutoFour-Door Car307.139132
15BW63560OregonNaNBachelor746313.9424599.064.01/1/00Corporate AutoFour-Door Car42.920271
16FV94802NevadaNaNHigh School or Below256686.7825049.067.01/0/00Personal AutoTwo-Door Car454.245098
17OE15005CaliforniaNaNCollege394524.1628855.0101.01/0/00Personal AutoSUV647.442031
18WC83389OregonNaNCollege571033.3151148.072.01/0/00Personal AutoFour-Door Car308.981664
19FL50705CaliforniaNaNHigh School or Below816261.7166140.0101.01/0/00Corporate AutoFour-Door Car484.800000
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value \\\n", + "0 RB50392 Washington NaN Master NaN \n", + "1 QZ44356 Arizona F Bachelor 697953.59 \n", + "2 AI49188 Nevada F Bachelor 1288743.17 \n", + "3 WW63253 California M Bachelor 764586.18 \n", + "4 GA49547 Washington M High School or Below 536307.65 \n", + "5 OC83172 Oregon F Bachelor 825629.78 \n", + "6 XZ87318 Oregon F College 538089.86 \n", + "7 CF85061 Arizona M Master 721610.03 \n", + "8 DY87989 Oregon M Bachelor 2412750.40 \n", + "9 BQ94931 Oregon F College 738817.81 \n", + "10 SX51350 California M College 473899.20 \n", + "11 VQ65197 California NaN College 819719.71 \n", + "12 DP39365 California NaN Master 879879.70 \n", + "13 SJ95423 Arizona NaN High School or Below 881901.89 \n", + "14 IL66569 California NaN College 538443.17 \n", + "15 BW63560 Oregon NaN Bachelor 746313.94 \n", + "16 FV94802 Nevada NaN High School or Below 256686.78 \n", + "17 OE15005 California NaN College 394524.16 \n", + "18 WC83389 Oregon NaN College 571033.31 \n", + "19 FL50705 California NaN High School or Below 816261.71 \n", + "\n", + " income monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 0.0 1000.0 1/0/00 Personal Auto \n", + "1 0.0 94.0 1/0/00 Personal Auto \n", + "2 48767.0 108.0 1/0/00 Personal Auto \n", + "3 0.0 106.0 1/0/00 Corporate Auto \n", + "4 36357.0 68.0 1/0/00 Personal Auto \n", + "5 62902.0 69.0 1/0/00 Personal Auto \n", + "6 55350.0 67.0 1/0/00 Corporate Auto \n", + "7 0.0 101.0 1/0/00 Corporate Auto \n", + "8 14072.0 71.0 1/0/00 Corporate Auto \n", + "9 28812.0 93.0 1/0/00 Special Auto \n", + "10 0.0 67.0 1/0/00 Personal Auto \n", + "11 0.0 110.0 1/0/00 Personal Auto \n", + "12 77026.0 110.0 1/2/00 Corporate Auto \n", + "13 99845.0 110.0 1/1/00 Corporate Auto \n", + "14 83689.0 70.0 1/2/00 Corporate Auto \n", + "15 24599.0 64.0 1/1/00 Corporate Auto \n", + "16 25049.0 67.0 1/0/00 Personal Auto \n", + "17 28855.0 101.0 1/0/00 Personal Auto \n", + "18 51148.0 72.0 1/0/00 Personal Auto \n", + "19 66140.0 101.0 1/0/00 Corporate Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 2.704934 \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 \n", + "5 Two-Door Car 159.383042 \n", + "6 Four-Door Car 321.600000 \n", + "7 Four-Door Car 363.029680 \n", + "8 Four-Door Car 511.200000 \n", + "9 Four-Door Car 425.527834 \n", + "10 Four-Door Car 482.400000 \n", + "11 SUV 528.000000 \n", + "12 Four-Door Car 472.029737 \n", + "13 SUV 528.000000 \n", + "14 Four-Door Car 307.139132 \n", + "15 Four-Door Car 42.920271 \n", + "16 Two-Door Car 454.245098 \n", + "17 SUV 647.442031 \n", + "18 Four-Door Car 308.981664 \n", + "19 Four-Door Car 484.800000 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gender_map = {\n", + " \"M\": \"M\",\n", + " \"Male\": \"M\",\n", + " \"male\": \"M\",\n", + " \"F\": \"F\",\n", + " \"Femal\": \"F\",\n", + " \"Female\": \"F\",\n", + " \"female\": \"F\"\n", + "}\n", + "\n", + "# Standardize gender column using map()\n", + "df_all['gender'] = df_all['gender'].replace(gender_map)\n", + "\n", + "state_map = {\n", + " \"AZ\": \"Arizona\",\n", + " \"Cali\": \"California\",\n", + " \"WA\": \"Washington\",\n", + "}\n", + "\n", + "# Standardize gender column using map()\n", + "df_all['state'] = df_all['state'].replace(state_map)\n", + "\n", + "df_all['education'] = df_all['education'].replace(\"Bachelors\", \"Bachelor\")\n", + "\n", + "# Remove '%' character from customer_lifetime_value\n", + "df_all['customer_lifetime_value'] = df_all['customer_lifetime_value'].str.replace('%', '', regex=False)\n", + "\n", + "\n", + "vehicle_map = {\n", + " \"Sports Car\": \"Luxury\",\n", + " \"Luxury SUV\": \"Luxury\",\n", + " \"Luxury Car\": \"Luxury\",\n", + "}\n", + "\n", + "# Standardize gender column using map()\n", + "df_all['vehicle_class'] = df_all['vehicle_class'].replace(vehicle_map)\n", + "\n", + "df_all.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e12faf58", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer object\n", + "state object\n", + "gender object\n", + "education object\n", + "customer_lifetime_value object\n", + "income float64\n", + "monthly_premium_auto float64\n", + "number_of_open_complaints object\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount float64\n", + "dtype: object" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_all.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "070d0222", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0.0\n", + "1 0.0\n", + "2 0.0\n", + "3 0.0\n", + "4 0.0\n", + " ... \n", + "7065 0.0\n", + "7066 0.0\n", + "7067 3.0\n", + "7068 0.0\n", + "7069 0.0\n", + "Name: number_of_open_complaints, Length: 12074, dtype: float64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_all[\"customer_lifetime_value\"] = df_all[\"customer_lifetime_value\"].astype(float)\n", + "\n", + "\n", + "def remove_bar(entry):\n", + " # Take the middle value after splitting by \"/\"\n", + " #print(entry)\n", + " if pd.isnull(entry):\n", + " return entry\n", + " elif type(entry) is int:\n", + " return float(entry)\n", + " else:\n", + " return float(entry.split(\"/\")[1])\n", + "\n", + "df_all[\"number_of_open_complaints\"] = df_all[\"number_of_open_complaints\"].apply(remove_bar)\n", + "#df_all[\"number_of_open_complaints\"] = df_all[\"number_of_open_complaints\"].astype(float)\n", + "\n", + "df_all[\"number_of_open_complaints\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2718ec22", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer 2937\n", + "state 2937\n", + "gender 3059\n", + "education 2937\n", + "customer_lifetime_value 10014\n", + "income 2937\n", + "monthly_premium_auto 2937\n", + "number_of_open_complaints 2937\n", + "policy_type 2937\n", + "vehicle_class 2937\n", + "total_claim_amount 2937\n", + "dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_all.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c647d567", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer 0\n", + "state 0\n", + "gender 122\n", + "education 0\n", + "customer_lifetime_value 7077\n", + "income 0\n", + "monthly_premium_auto 0\n", + "number_of_open_complaints 0\n", + "policy_type 0\n", + "vehicle_class 0\n", + "total_claim_amount 0\n", + "dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_all = df_all.dropna(how=\"all\")\n", + "\n", + "df_all.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "db5d923e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(9137, 11)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_all.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "44e6785b", + "metadata": {}, + "outputs": [], + "source": [ + "df_all = df_all.drop(\"customer_lifetime_value\", axis=1) #dropped this because too many nan values\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "4493194b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer 0\n", + "state 0\n", + "gender 122\n", + "education 0\n", + "income 0\n", + "monthly_premium_auto 0\n", + "number_of_open_complaints 0\n", + "policy_type 0\n", + "vehicle_class 0\n", + "total_claim_amount 0\n", + "dtype: int64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_all.shape\n", + "df_all.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "cd46803c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer 0\n", + "state 0\n", + "gender 0\n", + "education 0\n", + "income 0\n", + "monthly_premium_auto 0\n", + "number_of_open_complaints 0\n", + "policy_type 0\n", + "vehicle_class 0\n", + "total_claim_amount 0\n", + "dtype: int64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_all[\"gender\"].value_counts()\n", + "df_all[\"gender\"] = df_all[\"gender\"].fillna(\"F\")\n", + "df_all.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6bb5f4ed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer object\n", + "state object\n", + "gender object\n", + "education object\n", + "income float64\n", + "monthly_premium_auto float64\n", + "number_of_open_complaints float64\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount float64\n", + "dtype: object" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_all.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "a99b0811", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonFMaster0.01000.00.0Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor0.094.00.0Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor48767.0108.00.0Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor0.0106.00.0Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below36357.068.00.0Personal AutoFour-Door Car17.269323
.................................
7065LA72316CaliforniaMBachelor71941.073.00.0Personal AutoFour-Door Car198.234764
7066PK87824CaliforniaFCollege21604.079.00.0Corporate AutoFour-Door Car379.200000
7067TD14365CaliforniaMBachelor0.085.03.0Corporate AutoFour-Door Car790.784983
7068UP19263CaliforniaMCollege21941.096.00.0Personal AutoFour-Door Car691.200000
7069Y167826CaliforniaMCollege0.077.00.0Corporate AutoTwo-Door Car369.600000
\n", + "

9063 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " customer state gender education income \\\n", + "0 RB50392 Washington F Master 0.0 \n", + "1 QZ44356 Arizona F Bachelor 0.0 \n", + "2 AI49188 Nevada F Bachelor 48767.0 \n", + "3 WW63253 California M Bachelor 0.0 \n", + "4 GA49547 Washington M High School or Below 36357.0 \n", + "... ... ... ... ... ... \n", + "7065 LA72316 California M Bachelor 71941.0 \n", + "7066 PK87824 California F College 21604.0 \n", + "7067 TD14365 California M Bachelor 0.0 \n", + "7068 UP19263 California M College 21941.0 \n", + "7069 Y167826 California M College 0.0 \n", + "\n", + " monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 1000.0 0.0 Personal Auto \n", + "1 94.0 0.0 Personal Auto \n", + "2 108.0 0.0 Personal Auto \n", + "3 106.0 0.0 Corporate Auto \n", + "4 68.0 0.0 Personal Auto \n", + "... ... ... ... \n", + "7065 73.0 0.0 Personal Auto \n", + "7066 79.0 0.0 Corporate Auto \n", + "7067 85.0 3.0 Corporate Auto \n", + "7068 96.0 0.0 Personal Auto \n", + "7069 77.0 0.0 Corporate Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 2.704934 \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 \n", + "... ... ... \n", + "7065 Four-Door Car 198.234764 \n", + "7066 Four-Door Car 379.200000 \n", + "7067 Four-Door Car 790.784983 \n", + "7068 Four-Door Car 691.200000 \n", + "7069 Two-Door Car 369.600000 \n", + "\n", + "[9063 rows x 10 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_all.duplicated().sum()\n", + "\n", + "df_all.drop_duplicates()" ] }, { @@ -72,14 +1816,701 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26", "metadata": { "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(10910, 27)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code goes here\n", + "url4 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\"\n", + "\n", + "df4 = pd.read_csv(url4)\n", + "\n", + "\n", + "df4.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "96299a22", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
00DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
11KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
22LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
33XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
44QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
\n", + "

5 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 9 Corporate Auto Corporate L3 Offer3 \n", + "1 1 Personal Auto Personal L3 Offer4 \n", + "2 2 Personal Auto Personal L3 Offer3 \n", + "3 2 Corporate Auto Corporate L3 Offer2 \n", + "4 7 Personal Auto Personal L2 Offer1 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "0 Agent 292.800000 Four-Door Car Medsize \n", + "1 Call Center 744.924331 Four-Door Car Medsize \n", + "2 Call Center 480.000000 SUV Medsize \n", + "3 Branch 484.013411 Four-Door Car Medsize \n", + "4 Branch 707.925645 Four-Door Car Medsize \n", + "\n", + " vehicle_type month \n", + "0 A 2 \n", + "1 A 1 \n", + "2 A 2 \n", + "3 A 1 \n", + "4 A 1 \n", + "\n", + "[5 rows x 27 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code goes here" + "df4.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "348a421f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['unnamed:_0', 'customer', 'state', 'customer_lifetime_value',\n", + " 'response', 'coverage', 'education', 'effective_to_date',\n", + " 'employmentstatus', 'gender', 'income', 'location_code',\n", + " 'marital_status', 'monthly_premium_auto', 'months_since_last_claim',\n", + " 'months_since_policy_inception', 'number_of_open_complaints',\n", + " 'number_of_policies', 'policy_type', 'policy', 'renew_offer_type',\n", + " 'sales_channel', 'total_claim_amount', 'vehicle_class', 'vehicle_size',\n", + " 'vehicle_type', 'month'],\n", + " dtype='object')" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df4.columns = df4.columns.str.replace(\"ST\", \"state\")\n", + "\n", + "df4.columns = (\n", + " df4.columns\n", + " .str.lower()\n", + " .str.replace(\" \", \"_\")\n", + ")\n", + "\n", + "df4.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "4d9d3f62", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
00DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
11KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
22LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
33XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
44QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
\n", + "

5 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 9 Corporate Auto Corporate L3 Offer3 \n", + "1 1 Personal Auto Personal L3 Offer4 \n", + "2 2 Personal Auto Personal L3 Offer3 \n", + "3 2 Corporate Auto Corporate L3 Offer2 \n", + "4 7 Personal Auto Personal L2 Offer1 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "0 Agent 292.800000 Four-Door Car Medsize \n", + "1 Call Center 744.924331 Four-Door Car Medsize \n", + "2 Call Center 480.000000 SUV Medsize \n", + "3 Branch 484.013411 Four-Door Car Medsize \n", + "4 Branch 707.925645 Four-Door Car Medsize \n", + "\n", + " vehicle_type month \n", + "0 A 2 \n", + "1 A 1 \n", + "2 A 2 \n", + "3 A 1 \n", + "4 A 1 \n", + "\n", + "[5 rows x 27 columns]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gender_map = {\n", + " \"M\": \"M\",\n", + " \"Male\": \"M\",\n", + " \"male\": \"M\",\n", + " \"F\": \"F\",\n", + " \"Femal\": \"F\",\n", + " \"Female\": \"F\",\n", + " \"female\": \"F\"\n", + "}\n", + "\n", + "# Standardize gender column using map()\n", + "df4['gender'] = df4['gender'].replace(gender_map)\n", + "\n", + "state_map = {\n", + " \"AZ\": \"Arizona\",\n", + " \"Cali\": \"California\",\n", + " \"WA\": \"Washington\",\n", + "}\n", + "\n", + "# Standardize gender column using map()\n", + "df4['state'] = df4['state'].replace(state_map)\n", + "\n", + "df4['education'] = df4['education'].replace(\"Bachelors\", \"Bachelor\")\n", + "\n", + "# Remove '%' character from customer_lifetime_value\n", + "#df4['customer_lifetime_value'] = df4['customer_lifetime_value'].str.replace('%', '', regex=False)\n", + "\n", + "\n", + "vehicle_map = {\n", + " \"Sports Car\": \"Luxury\",\n", + " \"Luxury SUV\": \"Luxury\",\n", + " \"Luxury Car\": \"Luxury\",\n", + "}\n", + "\n", + "# Standardize gender column using map()\n", + "df4['vehicle_class'] = df4['vehicle_class'].replace(vehicle_map)\n", + "\n", + "df4.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "3b42a87b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "unnamed:_0 int64\n", + "customer object\n", + "state object\n", + "customer_lifetime_value float64\n", + "response object\n", + "coverage object\n", + "education object\n", + "effective_to_date object\n", + "employmentstatus object\n", + "gender object\n", + "income int64\n", + "location_code object\n", + "marital_status object\n", + "monthly_premium_auto int64\n", + "months_since_last_claim float64\n", + "months_since_policy_inception int64\n", + "number_of_open_complaints float64\n", + "number_of_policies int64\n", + "policy_type object\n", + "policy object\n", + "renew_offer_type object\n", + "sales_channel object\n", + "total_claim_amount float64\n", + "vehicle_class object\n", + "vehicle_size object\n", + "vehicle_type object\n", + "month int64\n", + "dtype: object" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df4.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "0cfe7c99", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "unnamed:_0 0\n", + "customer 0\n", + "state 0\n", + "customer_lifetime_value 0\n", + "response 0\n", + "coverage 0\n", + "education 0\n", + "effective_to_date 0\n", + "employmentstatus 0\n", + "gender 0\n", + "income 0\n", + "location_code 0\n", + "marital_status 0\n", + "monthly_premium_auto 0\n", + "months_since_last_claim 0\n", + "months_since_policy_inception 0\n", + "number_of_open_complaints 0\n", + "number_of_policies 0\n", + "policy_type 0\n", + "policy 0\n", + "renew_offer_type 0\n", + "sales_channel 0\n", + "total_claim_amount 0\n", + "vehicle_class 0\n", + "vehicle_size 0\n", + "vehicle_type 0\n", + "month 0\n", + "dtype: int64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df4.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "43b2bce0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "np.int64(0)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df4.duplicated().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "11dcb866", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " total_claim_amount\n", + "sales_channel \n", + "Agent 1810226.82\n", + "Branch 1301204.00\n", + "Call Center 926600.82\n", + "Web 706600.04\n" + ] + } + ], + "source": [ + "#total revenue --> branch, call center, web, and mail --> round 2 decimals\n", + "# Pivot table: total revenue per sales channel\n", + "revenue_by_channel = df4.pivot_table(\n", + " index='sales_channel',\n", + " values='total_claim_amount',\n", + " aggfunc='sum'\n", + ").round(2)\n", + "\n", + "print(revenue_by_channel)" ] }, { @@ -93,6 +2524,104 @@ "Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights." ] }, + { + "cell_type": "code", + "execution_count": 34, + "id": "df5e5542", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_lifetime_value
educationBachelorCollegeDoctorHigh School or BelowMaster
gender
F7874.2694787748.8233257328.5089168675.2202018157.053154
M7703.6016758052.4592887415.3336388149.6877838168.832659
\n", + "
" + ], + "text/plain": [ + " customer_lifetime_value \\\n", + "education Bachelor College Doctor \n", + "gender \n", + "F 7874.269478 7748.823325 7328.508916 \n", + "M 7703.601675 8052.459288 7415.333638 \n", + "\n", + " \n", + "education High School or Below Master \n", + "gender \n", + "F 8675.220201 8157.053154 \n", + "M 8149.687783 8168.832659 " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#average customer lifetime value per gender and education level\n", + "#df4(\"customer_lifetime_value\")\n", + "gender_educat = pd.DataFrame(df4.groupby(['gender', \"education\"])['customer_lifetime_value'].mean())\n", + "\n", + "gender_educat.unstack()" + ] + }, { "cell_type": "markdown", "id": "640993b2-a291-436c-a34d-a551144f8196", @@ -130,14 +2659,108 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "id": "3a069e0b-b400-470e-904d-d17582191be4", "metadata": { "id": "3a069e0b-b400-470e-904d-d17582191be4" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
policy_typemonthvariablevalue
0Corporate Auto1number_of_open_complaints5.0
1Corporate Auto2number_of_open_complaints4.0
2Personal Auto1number_of_open_complaints5.0
3Personal Auto2number_of_open_complaints5.0
4Special Auto1number_of_open_complaints5.0
5Special Auto2number_of_open_complaints5.0
\n", + "
" + ], + "text/plain": [ + " policy_type month variable value\n", + "0 Corporate Auto 1 number_of_open_complaints 5.0\n", + "1 Corporate Auto 2 number_of_open_complaints 4.0\n", + "2 Personal Auto 1 number_of_open_complaints 5.0\n", + "3 Personal Auto 2 number_of_open_complaints 5.0\n", + "4 Special Auto 1 number_of_open_complaints 5.0\n", + "5 Special Auto 2 number_of_open_complaints 5.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "# Your code goes here" + "# Your code goes here\n", + "complaints = pd.DataFrame(df4.groupby(['policy_type', \"month\"])['number_of_open_complaints'].max()).reset_index()\n", + "type(complaints)\n", + "\n", + "complaints2 = pd.melt(complaints, id_vars=[\"policy_type\", \"month\"], value_vars=['number_of_open_complaints'])\n", + "\n", + "display(complaints2)" ] } ], @@ -146,7 +2769,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -160,7 +2783,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4,