diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb
index ec4e3f9..097bbb2 100644
--- a/lab-dw-data-structuring-and-combining.ipynb
+++ b/lab-dw-data-structuring-and-combining.ipynb
@@ -36,14 +36,2322 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
+ "id": "fa2260cc-b023-47c2-9d1d-869c038dbba8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
"id": "492d06e3-92c7-4105-ac72-536db98d3244",
"metadata": {
"id": "492d06e3-92c7-4105-ac72-536db98d3244"
},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 4003 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4004 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4005 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4006 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4007 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4008 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "4003 NaN NaN NaN NaN \n",
+ "4004 NaN NaN NaN NaN \n",
+ "4005 NaN NaN NaN NaN \n",
+ "4006 NaN NaN NaN NaN \n",
+ "4007 NaN NaN NaN NaN \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Total Claim Amount \n",
+ "0 2.704934 \n",
+ "1 1131.464935 \n",
+ "2 566.472247 \n",
+ "3 529.881344 \n",
+ "4 17.269323 \n",
+ "... ... \n",
+ "4003 NaN \n",
+ "4004 NaN \n",
+ "4005 NaN \n",
+ "4006 NaN \n",
+ "4007 NaN \n",
+ "\n",
+ "[4008 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1 = pd.read_csvdf = pd.read_csv (\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\")\n",
+ "df1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "73e4dbf2-e2f6-48a5-9547-91866405aa32",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Total Claim Amount | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " GS98873 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 323912.47% | \n",
+ " 16061 | \n",
+ " 88 | \n",
+ " 1/0/00 | \n",
+ " 633.600000 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 462680.11% | \n",
+ " 79487 | \n",
+ " 114 | \n",
+ " 1/0/00 | \n",
+ " 547.200000 | \n",
+ " Special Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " MY31220 | \n",
+ " California | \n",
+ " F | \n",
+ " College | \n",
+ " 899704.02% | \n",
+ " 54230 | \n",
+ " 112 | \n",
+ " 1/0/00 | \n",
+ " 537.600000 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " UH35128 | \n",
+ " Oregon | \n",
+ " F | \n",
+ " College | \n",
+ " 2580706.30% | \n",
+ " 71210 | \n",
+ " 214 | \n",
+ " 1/1/00 | \n",
+ " 1027.200000 | \n",
+ " Personal Auto | \n",
+ " Luxury Car | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " WH52799 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 380812.21% | \n",
+ " 94903 | \n",
+ " 94 | \n",
+ " 1/0/00 | \n",
+ " 451.200000 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 991 | \n",
+ " HV85198 | \n",
+ " Arizona | \n",
+ " M | \n",
+ " Master | \n",
+ " 847141.75% | \n",
+ " 63513 | \n",
+ " 70 | \n",
+ " 1/0/00 | \n",
+ " 185.667213 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 992 | \n",
+ " BS91566 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 543121.91% | \n",
+ " 58161 | \n",
+ " 68 | \n",
+ " 1/0/00 | \n",
+ " 140.747286 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 993 | \n",
+ " IL40123 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " College | \n",
+ " 568964.41% | \n",
+ " 83640 | \n",
+ " 70 | \n",
+ " 1/0/00 | \n",
+ " 471.050488 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 994 | \n",
+ " MY32149 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 368672.38% | \n",
+ " 0 | \n",
+ " 96 | \n",
+ " 1/0/00 | \n",
+ " 28.460568 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 995 | \n",
+ " SA91515 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 399258.39% | \n",
+ " 0 | \n",
+ " 111 | \n",
+ " 1/0/00 | \n",
+ " 700.349052 | \n",
+ " Personal Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
996 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education Customer Lifetime Value Income \\\n",
+ "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n",
+ "1 CW49887 California F Master 462680.11% 79487 \n",
+ "2 MY31220 California F College 899704.02% 54230 \n",
+ "3 UH35128 Oregon F College 2580706.30% 71210 \n",
+ "4 WH52799 Arizona F College 380812.21% 94903 \n",
+ ".. ... ... ... ... ... ... \n",
+ "991 HV85198 Arizona M Master 847141.75% 63513 \n",
+ "992 BS91566 Arizona F College 543121.91% 58161 \n",
+ "993 IL40123 Nevada F College 568964.41% 83640 \n",
+ "994 MY32149 California F Master 368672.38% 0 \n",
+ "995 SA91515 California M Bachelor 399258.39% 0 \n",
+ "\n",
+ " Monthly Premium Auto Number of Open Complaints Total Claim Amount \\\n",
+ "0 88 1/0/00 633.600000 \n",
+ "1 114 1/0/00 547.200000 \n",
+ "2 112 1/0/00 537.600000 \n",
+ "3 214 1/1/00 1027.200000 \n",
+ "4 94 1/0/00 451.200000 \n",
+ ".. ... ... ... \n",
+ "991 70 1/0/00 185.667213 \n",
+ "992 68 1/0/00 140.747286 \n",
+ "993 70 1/0/00 471.050488 \n",
+ "994 96 1/0/00 28.460568 \n",
+ "995 111 1/0/00 700.349052 \n",
+ "\n",
+ " Policy Type Vehicle Class \n",
+ "0 Personal Auto Four-Door Car \n",
+ "1 Special Auto SUV \n",
+ "2 Personal Auto Two-Door Car \n",
+ "3 Personal Auto Luxury Car \n",
+ "4 Corporate Auto Two-Door Car \n",
+ ".. ... ... \n",
+ "991 Personal Auto Four-Door Car \n",
+ "992 Corporate Auto Four-Door Car \n",
+ "993 Corporate Auto Two-Door Car \n",
+ "994 Personal Auto Two-Door Car \n",
+ "995 Personal Auto SUV \n",
+ "\n",
+ "[996 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2 = pd.read_csvdf = pd.read_csv (\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\")\n",
+ "df2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "f228e79e-03b1-4992-8a88-4c84ca653af1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " State | \n",
+ " Customer Lifetime Value | \n",
+ " Education | \n",
+ " Gender | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Total Claim Amount | \n",
+ " Vehicle Class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " SA25987 | \n",
+ " Washington | \n",
+ " 3479.137523 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 0 | \n",
+ " 104 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 499.200000 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " TB86706 | \n",
+ " Arizona | \n",
+ " 2502.637401 | \n",
+ " Master | \n",
+ " M | \n",
+ " 0 | \n",
+ " 66 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 3.468912 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ZL73902 | \n",
+ " Nevada | \n",
+ " 3265.156348 | \n",
+ " Bachelor | \n",
+ " F | \n",
+ " 25820 | \n",
+ " 82 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 393.600000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " KX23516 | \n",
+ " California | \n",
+ " 4455.843406 | \n",
+ " High School or Below | \n",
+ " F | \n",
+ " 0 | \n",
+ " 121 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 699.615192 | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " FN77294 | \n",
+ " California | \n",
+ " 7704.958480 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 30366 | \n",
+ " 101 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " 484.800000 | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer State Customer Lifetime Value Education Gender \\\n",
+ "0 SA25987 Washington 3479.137523 High School or Below M \n",
+ "1 TB86706 Arizona 2502.637401 Master M \n",
+ "2 ZL73902 Nevada 3265.156348 Bachelor F \n",
+ "3 KX23516 California 4455.843406 High School or Below F \n",
+ "4 FN77294 California 7704.958480 High School or Below M \n",
+ "\n",
+ " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n",
+ "0 0 104 0 Personal Auto \n",
+ "1 0 66 0 Personal Auto \n",
+ "2 25820 82 0 Personal Auto \n",
+ "3 0 121 0 Personal Auto \n",
+ "4 30366 101 2 Personal Auto \n",
+ "\n",
+ " Total Claim Amount Vehicle Class \n",
+ "0 499.200000 Two-Door Car \n",
+ "1 3.468912 Two-Door Car \n",
+ "2 393.600000 Four-Door Car \n",
+ "3 699.615192 SUV \n",
+ "4 484.800000 SUV "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df3 = pd.read_csvdf = pd.read_csv (\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\")\n",
+ "df3.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "6a21ea05-7b32-48c5-9b7a-668eb0bf358e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ " State | \n",
+ " Gender | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 7065 | \n",
+ " LA72316 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 23405.98798 | \n",
+ " 71941.0 | \n",
+ " 73.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 198.234764 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 7066 | \n",
+ " PK87824 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 3096.511217 | \n",
+ " 21604.0 | \n",
+ " 79.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 379.200000 | \n",
+ " California | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " 7067 | \n",
+ " TD14365 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Bachelor | \n",
+ " 8163.890428 | \n",
+ " 0.0 | \n",
+ " 85.0 | \n",
+ " 3 | \n",
+ " Corporate Auto | \n",
+ " Four-Door Car | \n",
+ " 790.784983 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 7068 | \n",
+ " UP19263 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 7524.442436 | \n",
+ " 21941.0 | \n",
+ " 96.0 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 691.200000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ " 7069 | \n",
+ " Y167826 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " College | \n",
+ " 2611.836866 | \n",
+ " 0.0 | \n",
+ " 77.0 | \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ " 369.600000 | \n",
+ " California | \n",
+ " M | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
12074 rows × 13 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "7065 LA72316 NaN NaN Bachelor \n",
+ "7066 PK87824 NaN NaN College \n",
+ "7067 TD14365 NaN NaN Bachelor \n",
+ "7068 UP19263 NaN NaN College \n",
+ "7069 Y167826 NaN NaN College \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "7065 23405.98798 71941.0 73.0 \n",
+ "7066 3096.511217 21604.0 79.0 \n",
+ "7067 8163.890428 0.0 85.0 \n",
+ "7068 7524.442436 21941.0 96.0 \n",
+ "7069 2611.836866 0.0 77.0 \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "7065 0 Personal Auto Four-Door Car \n",
+ "7066 0 Corporate Auto Four-Door Car \n",
+ "7067 3 Corporate Auto Four-Door Car \n",
+ "7068 0 Personal Auto Four-Door Car \n",
+ "7069 0 Corporate Auto Two-Door Car \n",
+ "\n",
+ " Total Claim Amount State Gender \n",
+ "0 2.704934 NaN NaN \n",
+ "1 1131.464935 NaN NaN \n",
+ "2 566.472247 NaN NaN \n",
+ "3 529.881344 NaN NaN \n",
+ "4 17.269323 NaN NaN \n",
+ "... ... ... ... \n",
+ "7065 198.234764 California M \n",
+ "7066 379.200000 California F \n",
+ "7067 790.784983 California M \n",
+ "7068 691.200000 California M \n",
+ "7069 369.600000 California M \n",
+ "\n",
+ "[12074 rows x 13 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1, df2, df3], axis=0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "b3d034c6-25b9-4d57-9c11-273e4c4d2200",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 4003 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 4004 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 4005 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 4006 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 4007 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4008 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education Customer Lifetime Value Income \\\n",
+ "0 False False True False True False \n",
+ "1 False False False False False False \n",
+ "2 False False False False False False \n",
+ "3 False False False False False False \n",
+ "4 False False False False False False \n",
+ "... ... ... ... ... ... ... \n",
+ "4003 True True True True True True \n",
+ "4004 True True True True True True \n",
+ "4005 True True True True True True \n",
+ "4006 True True True True True True \n",
+ "4007 True True True True True True \n",
+ "\n",
+ " Monthly Premium Auto Number of Open Complaints Policy Type \\\n",
+ "0 False False False \n",
+ "1 False False False \n",
+ "2 False False False \n",
+ "3 False False False \n",
+ "4 False False False \n",
+ "... ... ... ... \n",
+ "4003 True True True \n",
+ "4004 True True True \n",
+ "4005 True True True \n",
+ "4006 True True True \n",
+ "4007 True True True \n",
+ "\n",
+ " Vehicle Class Total Claim Amount \n",
+ "0 False False \n",
+ "1 False False \n",
+ "2 False False \n",
+ "3 False False \n",
+ "4 False False \n",
+ "... ... ... \n",
+ "4003 True True \n",
+ "4004 True True \n",
+ "4005 True True \n",
+ "4006 True True \n",
+ "4007 True True \n",
+ "\n",
+ "[4008 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ " df.isnull()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "3dcbea52-68d6-424e-a482-0b71f1b2f6bb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Customer True\n",
+ "ST True\n",
+ "GENDER True\n",
+ "Education True\n",
+ "Customer Lifetime Value True\n",
+ "Income True\n",
+ "Monthly Premium Auto True\n",
+ "Number of Open Complaints True\n",
+ "Policy Type True\n",
+ "Vehicle Class True\n",
+ "Total Claim Amount True\n",
+ "dtype: bool"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isna().any()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "5fb9208d-6e20-4206-9129-0e8c087cd44e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Customer 2937\n",
+ "ST 2937\n",
+ "GENDER 3054\n",
+ "Education 2937\n",
+ "Customer Lifetime Value 2940\n",
+ "Income 2937\n",
+ "Monthly Premium Auto 2937\n",
+ "Number of Open Complaints 2937\n",
+ "Policy Type 2937\n",
+ "Vehicle Class 2937\n",
+ "Total Claim Amount 2937\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "a378d21d-5ef8-49d7-aa73-3658f763369b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 2\n",
+ "1 0\n",
+ "2 0\n",
+ "3 0\n",
+ "4 0\n",
+ " ..\n",
+ "4003 11\n",
+ "4004 11\n",
+ "4005 11\n",
+ "4006 11\n",
+ "4007 11\n",
+ "Length: 4008, dtype: int64"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isna().sum(axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "4ed75f4a-37dd-483f-a89b-7fdd18de6342",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(4008, 11)"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "33836ac6-d833-406d-84ac-f2a3f635e0e1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Customer object\n",
+ "ST object\n",
+ "GENDER object\n",
+ "Education object\n",
+ "Customer Lifetime Value object\n",
+ "Income float64\n",
+ "Monthly Premium Auto float64\n",
+ "Number of Open Complaints object\n",
+ "Policy Type object\n",
+ "Vehicle Class object\n",
+ "Total Claim Amount float64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "d0862e10-6069-42a5-acf4-cabe309b1dba",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.Income.fillna(df.Income.mean()).head"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "7ce0bba0-98f4-49ec-9fc0-f05fb819e0e9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 1000.00000\n",
+ "1 94.00000\n",
+ "2 108.00000\n",
+ "3 106.00000\n",
+ "4 68.00000\n",
+ " ... \n",
+ "4003 193.23436\n",
+ "4004 193.23436\n",
+ "4005 193.23436\n",
+ "4006 193.23436\n",
+ "4007 193.23436\n",
+ "Name: Monthly Premium Auto, Length: 4008, dtype: float64"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"Monthly Premium Auto\"].fillna(df[\"Monthly Premium Auto\"].mean())\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "784117be-a0f0-47d1-855e-5b3d9d77ad8c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 2.704934\n",
+ "1 1131.464935\n",
+ "2 566.472247\n",
+ "3 529.881344\n",
+ "4 17.269323\n",
+ " ... \n",
+ "4003 404.986909\n",
+ "4004 404.986909\n",
+ "4005 404.986909\n",
+ "4006 404.986909\n",
+ "4007 404.986909\n",
+ "Name: Total Claim Amount, Length: 4008, dtype: float64"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"Total Claim Amount\"].fillna(df[\"Total Claim Amount\"].mean())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "64dd0640-9a2a-4624-8d29-e36c7bca28a4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "np.int64(2936)"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.duplicated().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "a25c5d24-6015-4446-b1fe-a8f9d5b7703f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "np.True_"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.duplicated().any()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "1643bf84-1976-4340-a611-83681a7be8c5",
+ "metadata": {},
"outputs": [],
"source": [
- "# Your code goes here"
+ "df.drop_duplicates(inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "c0ba21f0-8d25-4798-92b5-de620aa4129a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "6f935055-dc6e-423e-98cb-4a4111324f54",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1067 | \n",
+ " VJ51327 | \n",
+ " Cali | \n",
+ " F | \n",
+ " High School or Below | \n",
+ " 2031499.76% | \n",
+ " 63209.0 | \n",
+ " 102.00000 | \n",
+ " 1/2/00 | \n",
+ " Personal Auto | \n",
+ " SUV | \n",
+ " 207.320041 | \n",
+ "
\n",
+ " \n",
+ " 1068 | \n",
+ " GS98873 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 323912.47% | \n",
+ " 16061.0 | \n",
+ " 88.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 633.600000 | \n",
+ "
\n",
+ " \n",
+ " 1069 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 462680.11% | \n",
+ " 79487.0 | \n",
+ " 114.00000 | \n",
+ " 1/0/00 | \n",
+ " Special Auto | \n",
+ " SUV | \n",
+ " 547.200000 | \n",
+ "
\n",
+ " \n",
+ " 1070 | \n",
+ " MY31220 | \n",
+ " California | \n",
+ " F | \n",
+ " College | \n",
+ " 899704.02% | \n",
+ " 54230.0 | \n",
+ " 112.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 537.600000 | \n",
+ "
\n",
+ " \n",
+ " 1071 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 193.23436 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education \\\n",
+ "1067 VJ51327 Cali F High School or Below \n",
+ "1068 GS98873 Arizona F Bachelor \n",
+ "1069 CW49887 California F Master \n",
+ "1070 MY31220 California F College \n",
+ "1071 NaN NaN NaN NaN \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "1067 2031499.76% 63209.0 102.00000 \n",
+ "1068 323912.47% 16061.0 88.00000 \n",
+ "1069 462680.11% 79487.0 114.00000 \n",
+ "1070 899704.02% 54230.0 112.00000 \n",
+ "1071 NaN NaN 193.23436 \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "1067 1/2/00 Personal Auto SUV \n",
+ "1068 1/0/00 Personal Auto Four-Door Car \n",
+ "1069 1/0/00 Special Auto SUV \n",
+ "1070 1/0/00 Personal Auto Two-Door Car \n",
+ "1071 NaN NaN NaN \n",
+ "\n",
+ " Total Claim Amount \n",
+ "1067 207.320041 \n",
+ "1068 633.600000 \n",
+ "1069 547.200000 \n",
+ "1070 537.600000 \n",
+ "1071 NaN "
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_without_duplicates = df.copy()\n",
+ "df_without_duplicates = df.drop_duplicates()\n",
+ "df_without_duplicates.tail()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "6faf81e9-0413-4910-8f61-6f122ca78880",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " index | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.00000 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1067 | \n",
+ " 1067 | \n",
+ " VJ51327 | \n",
+ " Cali | \n",
+ " F | \n",
+ " High School or Below | \n",
+ " 2031499.76% | \n",
+ " 63209.0 | \n",
+ " 102.00000 | \n",
+ " 1/2/00 | \n",
+ " Personal Auto | \n",
+ " SUV | \n",
+ " 207.320041 | \n",
+ "
\n",
+ " \n",
+ " 1068 | \n",
+ " 1068 | \n",
+ " GS98873 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 323912.47% | \n",
+ " 16061.0 | \n",
+ " 88.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 633.600000 | \n",
+ "
\n",
+ " \n",
+ " 1069 | \n",
+ " 1069 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 462680.11% | \n",
+ " 79487.0 | \n",
+ " 114.00000 | \n",
+ " 1/0/00 | \n",
+ " Special Auto | \n",
+ " SUV | \n",
+ " 547.200000 | \n",
+ "
\n",
+ " \n",
+ " 1070 | \n",
+ " 1070 | \n",
+ " MY31220 | \n",
+ " California | \n",
+ " F | \n",
+ " College | \n",
+ " 899704.02% | \n",
+ " 54230.0 | \n",
+ " 112.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 537.600000 | \n",
+ "
\n",
+ " \n",
+ " 1071 | \n",
+ " 1071 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 193.23436 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1072 rows × 12 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " index Customer ST GENDER Education \\\n",
+ "0 0 RB50392 Washington NaN Master \n",
+ "1 1 QZ44356 Arizona F Bachelor \n",
+ "2 2 AI49188 Nevada F Bachelor \n",
+ "3 3 WW63253 California M Bachelor \n",
+ "4 4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... ... \n",
+ "1067 1067 VJ51327 Cali F High School or Below \n",
+ "1068 1068 GS98873 Arizona F Bachelor \n",
+ "1069 1069 CW49887 California F Master \n",
+ "1070 1070 MY31220 California F College \n",
+ "1071 1071 NaN NaN NaN NaN \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.00000 \n",
+ "1 697953.59% 0.0 94.00000 \n",
+ "2 1288743.17% 48767.0 108.00000 \n",
+ "3 764586.18% 0.0 106.00000 \n",
+ "4 536307.65% 36357.0 68.00000 \n",
+ "... ... ... ... \n",
+ "1067 2031499.76% 63209.0 102.00000 \n",
+ "1068 323912.47% 16061.0 88.00000 \n",
+ "1069 462680.11% 79487.0 114.00000 \n",
+ "1070 899704.02% 54230.0 112.00000 \n",
+ "1071 NaN NaN 193.23436 \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "1067 1/2/00 Personal Auto SUV \n",
+ "1068 1/0/00 Personal Auto Four-Door Car \n",
+ "1069 1/0/00 Special Auto SUV \n",
+ "1070 1/0/00 Personal Auto Two-Door Car \n",
+ "1071 NaN NaN NaN \n",
+ "\n",
+ " Total Claim Amount \n",
+ "0 2.704934 \n",
+ "1 1131.464935 \n",
+ "2 566.472247 \n",
+ "3 529.881344 \n",
+ "4 17.269323 \n",
+ "... ... \n",
+ "1067 207.320041 \n",
+ "1068 633.600000 \n",
+ "1069 547.200000 \n",
+ "1070 537.600000 \n",
+ "1071 NaN \n",
+ "\n",
+ "[1072 rows x 12 columns]"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_without_duplicates.reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "ca275cff-9981-4a7c-85da-d611cf1fdaf0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value',\n",
+ " 'Income', 'Monthly Premium Auto', 'Number of Open Complaints',\n",
+ " 'Policy Type', 'Vehicle Class', 'Total Claim Amount'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "id": "28a9788a-f36f-4da4-9820-280031242b05",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " State | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.00000 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1067 | \n",
+ " VJ51327 | \n",
+ " Cali | \n",
+ " F | \n",
+ " High School or Below | \n",
+ " 2031499.76% | \n",
+ " 63209.0 | \n",
+ " 102.00000 | \n",
+ " 1/2/00 | \n",
+ " Personal Auto | \n",
+ " SUV | \n",
+ " 207.320041 | \n",
+ "
\n",
+ " \n",
+ " 1068 | \n",
+ " GS98873 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 323912.47% | \n",
+ " 16061.0 | \n",
+ " 88.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 633.600000 | \n",
+ "
\n",
+ " \n",
+ " 1069 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 462680.11% | \n",
+ " 79487.0 | \n",
+ " 114.00000 | \n",
+ " 1/0/00 | \n",
+ " Special Auto | \n",
+ " SUV | \n",
+ " 547.200000 | \n",
+ "
\n",
+ " \n",
+ " 1070 | \n",
+ " MY31220 | \n",
+ " California | \n",
+ " F | \n",
+ " College | \n",
+ " 899704.02% | \n",
+ " 54230.0 | \n",
+ " 112.00000 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 537.600000 | \n",
+ "
\n",
+ " \n",
+ " 1071 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 193.23436 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1072 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer State GENDER Education \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "1067 VJ51327 Cali F High School or Below \n",
+ "1068 GS98873 Arizona F Bachelor \n",
+ "1069 CW49887 California F Master \n",
+ "1070 MY31220 California F College \n",
+ "1071 NaN NaN NaN NaN \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.00000 \n",
+ "1 697953.59% 0.0 94.00000 \n",
+ "2 1288743.17% 48767.0 108.00000 \n",
+ "3 764586.18% 0.0 106.00000 \n",
+ "4 536307.65% 36357.0 68.00000 \n",
+ "... ... ... ... \n",
+ "1067 2031499.76% 63209.0 102.00000 \n",
+ "1068 323912.47% 16061.0 88.00000 \n",
+ "1069 462680.11% 79487.0 114.00000 \n",
+ "1070 899704.02% 54230.0 112.00000 \n",
+ "1071 NaN NaN 193.23436 \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "1067 1/2/00 Personal Auto SUV \n",
+ "1068 1/0/00 Personal Auto Four-Door Car \n",
+ "1069 1/0/00 Special Auto SUV \n",
+ "1070 1/0/00 Personal Auto Two-Door Car \n",
+ "1071 NaN NaN NaN \n",
+ "\n",
+ " Total Claim Amount \n",
+ "0 2.704934 \n",
+ "1 1131.464935 \n",
+ "2 566.472247 \n",
+ "3 529.881344 \n",
+ "4 17.269323 \n",
+ "... ... \n",
+ "1067 207.320041 \n",
+ "1068 633.600000 \n",
+ "1069 547.200000 \n",
+ "1070 537.600000 \n",
+ "1071 NaN \n",
+ "\n",
+ "[1072 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.rename(columns={'ST': 'State'}, inplace=True)\n",
+ "df"
]
},
{
@@ -72,14 +2380,309 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 78,
"id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26",
"metadata": {
"id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26"
},
"outputs": [],
"source": [
- "# Your code goes here"
+ "import pandas as pd\n",
+ "\n",
+ "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "id": "0bca1aba-6e82-4b83-950c-fd7e1c0b809b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unnamed:_0 | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " response | \n",
+ " coverage | \n",
+ " education | \n",
+ " effective_to_date | \n",
+ " employmentstatus | \n",
+ " gender | \n",
+ " ... | \n",
+ " number_of_policies | \n",
+ " policy_type | \n",
+ " policy | \n",
+ " renew_offer_type | \n",
+ " sales_channel | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ " vehicle_size | \n",
+ " vehicle_type | \n",
+ " month | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unnamed:_0 customer state customer_lifetime_value response \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "\n",
+ " coverage education effective_to_date employmentstatus gender ... \\\n",
+ "0 Basic College 2011-02-18 Employed M ... \n",
+ "1 Basic College 2011-01-18 Unemployed F ... \n",
+ "2 Basic Bachelor 2011-02-10 Employed M ... \n",
+ "3 Extended College 2011-01-11 Employed M ... \n",
+ "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n",
+ "\n",
+ " number_of_policies policy_type policy renew_offer_type \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "\n",
+ " sales_channel total_claim_amount vehicle_class vehicle_size \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "\n",
+ " vehicle_type month \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "\n",
+ "[5 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 79,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "id": "49ba083a-9d4f-4025-aae0-d3493c5a09c6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "unnamed:_0 False\n",
+ "customer False\n",
+ "state False\n",
+ "customer_lifetime_value False\n",
+ "response False\n",
+ "coverage False\n",
+ "education False\n",
+ "effective_to_date False\n",
+ "employmentstatus False\n",
+ "gender False\n",
+ "income False\n",
+ "location_code False\n",
+ "marital_status False\n",
+ "monthly_premium_auto False\n",
+ "months_since_last_claim False\n",
+ "months_since_policy_inception False\n",
+ "number_of_open_complaints False\n",
+ "number_of_policies False\n",
+ "policy_type False\n",
+ "policy False\n",
+ "renew_offer_type False\n",
+ "sales_channel False\n",
+ "total_claim_amount False\n",
+ "vehicle_class False\n",
+ "vehicle_size False\n",
+ "vehicle_type False\n",
+ "month False\n",
+ "dtype: bool"
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isna().any()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "id": "42b0bcfc-497c-45d6-aca1-29b062e6e81c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "np.int64(0)"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.duplicated().sum()"
]
},
{
@@ -93,6 +2696,116 @@
"Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "id": "baeaf18a-7a90-4a19-b178-2356071397e7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " total_claim_amount\n",
+ "sales_channel \n",
+ "Agent 1.810227e+06\n",
+ "Branch 1.301204e+06\n",
+ "Call Center 9.266008e+05\n",
+ "Web 7.066000e+05\n"
+ ]
+ }
+ ],
+ "source": [
+ "summary = df.pivot_table(index=\"sales_channel\",values=\"total_claim_amount\",aggfunc=\"sum\")\n",
+ "print(summary)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 95,
+ "id": "517e43fb-79e1-434f-8595-399527737827",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " total_claim_amount | \n",
+ "
\n",
+ " \n",
+ " sales_channel | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Agent | \n",
+ " 1810226.82 | \n",
+ "
\n",
+ " \n",
+ " Branch | \n",
+ " 1301204.00 | \n",
+ "
\n",
+ " \n",
+ " Call Center | \n",
+ " 926600.82 | \n",
+ "
\n",
+ " \n",
+ " Web | \n",
+ " 706600.04 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " total_claim_amount\n",
+ "sales_channel \n",
+ "Agent 1810226.82\n",
+ "Branch 1301204.00\n",
+ "Call Center 926600.82\n",
+ "Web 706600.04"
+ ]
+ },
+ "execution_count": 95,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "summary = summary.round(2)\n",
+ "summary"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 96,
+ "id": "1735e50d-a4ae-4254-b7d3-891efee90957",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The Agent channel generated the highest revenue (1.81M), almost 40% more than Branch.\n",
+ "#Web brought the lowest total revenue, only 706K, less than half of Agent.\n",
+ "#Strongest channels are Agent and Branch."
+ ]
+ },
{
"cell_type": "markdown",
"id": "640993b2-a291-436c-a34d-a551144f8196",
@@ -103,6 +2816,49 @@
"2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "id": "a9a2eb9a-7cbf-46fc-a364-655d6c65e416",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "gender F M\n",
+ "education \n",
+ "Bachelor 7874.27 7703.60\n",
+ "College 7748.82 8052.46\n",
+ "Doctor 7328.51 7415.33\n",
+ "High School or Below 8675.22 8149.69\n",
+ "Master 8157.05 8168.83\n"
+ ]
+ }
+ ],
+ "source": [
+ "summary = df.pivot_table(\n",
+ " index=\"education\",\n",
+ " columns=\"gender\",\n",
+ " values=\"customer_lifetime_value\",\n",
+ " aggfunc=\"mean\"\n",
+ ").round(2)\n",
+ "\n",
+ "print(summary)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "id": "c2e404cb-aeeb-491f-888c-98b19d09d32b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Customers with Doctorate and Master’s degrees show the highest lifetime values, suggesting that higher education correlates with higher long-term revenue potential.\n",
+ "#Across all education levels, the gap between Female and Male customers is small.\n",
+ "#This means education level matters more than gender when predicting customer value."
+ ]
+ },
{
"cell_type": "markdown",
"id": "32c7f2e5-3d90-43e5-be33-9781b6069198",
@@ -130,15 +2886,105 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "3a069e0b-b400-470e-904d-d17582191be4",
- "metadata": {
- "id": "3a069e0b-b400-470e-904d-d17582191be4"
- },
- "outputs": [],
+ "execution_count": 113,
+ "id": "be4a576f-e317-4c16-ba0c-0f4ff5132677",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " month | \n",
+ " policy_type | \n",
+ " number_of_complaints | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Corporate Auto | \n",
+ " 443.434952 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " 1727.605722 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " Special Auto | \n",
+ " 87.074049 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " 385.208135 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " 1453.684441 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 2 | \n",
+ " Special Auto | \n",
+ " 95.226817 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " month policy_type number_of_complaints\n",
+ "0 1 Corporate Auto 443.434952\n",
+ "1 1 Personal Auto 1727.605722\n",
+ "2 1 Special Auto 87.074049\n",
+ "3 2 Corporate Auto 385.208135\n",
+ "4 2 Personal Auto 1453.684441\n",
+ "5 2 Special Auto 95.226817"
+ ]
+ },
+ "execution_count": 113,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code goes here"
+ "long_table = summary3.stack().reset_index()\n",
+ "long_table.columns = [\"month\", \"policy_type\", \"number_of_complaints\"]\n",
+ "long_table"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f42d06af-1a7d-4ba2-9f92-faf12d5218dc",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
@@ -146,9 +2992,9 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "Python [conda env:base] *",
"language": "python",
- "name": "python3"
+ "name": "conda-base-py"
},
"language_info": {
"codemirror_mode": {
@@ -160,7 +3006,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.13.5"
}
},
"nbformat": 4,