From 63b7e4606bba825a0540982cabbf8d64d0dea398 Mon Sep 17 00:00:00 2001 From: Irma Fernandez Date: Wed, 10 Sep 2025 19:57:17 +0200 Subject: [PATCH] Lab data structuring and combining done --- lab-dw-data-structuring-and-combining.ipynb | 2000 ++++++++++++++++++- 1 file changed, 1982 insertions(+), 18 deletions(-) diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb index ec4e3f9..79f6744 100644 --- a/lab-dw-data-structuring-and-combining.ipynb +++ b/lab-dw-data-structuring-and-combining.ipynb @@ -36,14 +36,853 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "492d06e3-92c7-4105-ac72-536db98d3244", "metadata": { "id": "492d06e3-92c7-4105-ac72-536db98d3244" }, "outputs": [], "source": [ - "# Your code goes here" + "import pandas as pd\n", + "\n", + "file1 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\")\n", + "file2 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\")\n", + "file3 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0d352e7e-e5c7-42df-83c4-09a6c46ec8a1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(12074, 13)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.concat([file1, file2, file3], axis=0)\n", + "data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "dbbddae6-5041-479c-9af4-436aaa3eecd6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 12074 entries, 0 to 7069\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Customer 9137 non-null object \n", + " 1 ST 2067 non-null object \n", + " 2 GENDER 1945 non-null object \n", + " 3 Education 9137 non-null object \n", + " 4 Customer Lifetime Value 9130 non-null object \n", + " 5 Income 9137 non-null float64\n", + " 6 Monthly Premium Auto 9137 non-null float64\n", + " 7 Number of Open Complaints 9137 non-null object \n", + " 8 Policy Type 9137 non-null object \n", + " 9 Vehicle Class 9137 non-null object \n", + " 10 Total Claim Amount 9137 non-null float64\n", + " 11 State 7070 non-null object \n", + " 12 Gender 7070 non-null object \n", + "dtypes: float64(3), object(10)\n", + "memory usage: 1.3+ MB\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerSTGENDEREducationCustomer Lifetime ValueIncomeMonthly Premium AutoNumber of Open ComplaintsPolicy TypeVehicle ClassTotal Claim AmountStateGender
count91372067194591379130.000009137.0000009137.0000009137.0913791379137.00000070707070
unique90568568211.00000NaNNaN12.036NaN52
topEE99484OregonFBachelor16468.22079NaNNaN0.0Personal AutoFour-Door CarNaNCaliforniaF
freq262398427196.00000NaNNaN5629.067924641NaN25443576
meanNaNNaNNaNNaNNaN37828.820291110.391266NaNNaNNaN430.527140NaNNaN
stdNaNNaNNaNNaNNaN30358.716159581.376032NaNNaNNaN289.582968NaNNaN
minNaNNaNNaNNaNNaN0.00000061.000000NaNNaNNaN0.099007NaNNaN
25%NaNNaNNaNNaNNaN0.00000068.000000NaNNaNNaN266.996814NaNNaN
50%NaNNaNNaNNaNNaN34244.00000083.000000NaNNaNNaN377.561463NaNNaN
75%NaNNaNNaNNaNNaN62447.000000109.000000NaNNaNNaN546.420009NaNNaN
maxNaNNaNNaNNaNNaN99981.00000035354.000000NaNNaNNaN2893.239678NaNNaN
\n", + "
" + ], + "text/plain": [ + " Customer ST GENDER Education Customer Lifetime Value \\\n", + "count 9137 2067 1945 9137 9130.00000 \n", + "unique 9056 8 5 6 8211.00000 \n", + "top EE99484 Oregon F Bachelor 16468.22079 \n", + "freq 2 623 984 2719 6.00000 \n", + "mean NaN NaN NaN NaN NaN \n", + "std NaN NaN NaN NaN NaN \n", + "min NaN NaN NaN NaN NaN \n", + "25% NaN NaN NaN NaN NaN \n", + "50% NaN NaN NaN NaN NaN \n", + "75% NaN NaN NaN NaN NaN \n", + "max NaN NaN NaN NaN NaN \n", + "\n", + " Income Monthly Premium Auto Number of Open Complaints \\\n", + "count 9137.000000 9137.000000 9137.0 \n", + "unique NaN NaN 12.0 \n", + "top NaN NaN 0.0 \n", + "freq NaN NaN 5629.0 \n", + "mean 37828.820291 110.391266 NaN \n", + "std 30358.716159 581.376032 NaN \n", + "min 0.000000 61.000000 NaN \n", + "25% 0.000000 68.000000 NaN \n", + "50% 34244.000000 83.000000 NaN \n", + "75% 62447.000000 109.000000 NaN \n", + "max 99981.000000 35354.000000 NaN \n", + "\n", + " Policy Type Vehicle Class Total Claim Amount State Gender \n", + "count 9137 9137 9137.000000 7070 7070 \n", + "unique 3 6 NaN 5 2 \n", + "top Personal Auto Four-Door Car NaN California F \n", + "freq 6792 4641 NaN 2544 3576 \n", + "mean NaN NaN 430.527140 NaN NaN \n", + "std NaN NaN 289.582968 NaN NaN \n", + "min NaN NaN 0.099007 NaN NaN \n", + "25% NaN NaN 266.996814 NaN NaN \n", + "50% NaN NaN 377.561463 NaN NaN \n", + "75% NaN NaN 546.420009 NaN NaN \n", + "max NaN NaN 2893.239678 NaN NaN " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()\n", + "data.info()\n", + "data.describe(include=\"all\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "50635f47-d816-45b3-baa5-3917352c0d0b", + "metadata": {}, + "outputs": [], + "source": [ + "#remove duplicates\n", + "data = data.drop_duplicates()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8439d374-0ca1-4879-9e30-307b1905aafa", + "metadata": {}, + "outputs": [], + "source": [ + "#standardize column names (lowercase, no spaces)\n", + "data.columns = [col.lower().replace(\" \",\"_\") for col in data.columns]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "846ff789-29db-49c3-b892-7cab63ab08d9", + "metadata": {}, + "outputs": [], + "source": [ + "#Remove duplicate column names\n", + "data = data.loc[:, ~data.columns.duplicated()]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d50687cd-2010-40f3-9ede-9de26bb0bbb1", + "metadata": {}, + "outputs": [], + "source": [ + "#Strip spaces from all string columns\n", + "for col in data.select_dtypes(include=\"object\").columns:\n", + " data[col] = data[col].str.strip()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "dce91d34-c478-4309-81ed-6968a142f3b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((9135, 12), np.int64(0))" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Quic overview\n", + "data.shape, data.duplicated().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b90fadc2-d997-4586-9d00-fa0f67930aa0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amountstate
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934NaN
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935NaN
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247NaN
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344NaN
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323NaN
\n", + "
" + ], + "text/plain": [ + " customer st gender education customer_lifetime_value \\\n", + "0 RB50392 Washington NaN Master NaN \n", + "1 QZ44356 Arizona F Bachelor 697953.59% \n", + "2 AI49188 Nevada F Bachelor 1288743.17% \n", + "3 WW63253 California M Bachelor 764586.18% \n", + "4 GA49547 Washington M High School or Below 536307.65% \n", + "\n", + " income monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 0.0 1000.0 1/0/00 Personal Auto \n", + "1 0.0 94.0 1/0/00 Personal Auto \n", + "2 48767.0 108.0 1/0/00 Personal Auto \n", + "3 0.0 106.0 1/0/00 Corporate Auto \n", + "4 36357.0 68.0 1/0/00 Personal Auto \n", + "\n", + " vehicle_class total_claim_amount state \n", + "0 Four-Door Car 2.704934 NaN \n", + "1 Four-Door Car 1131.464935 NaN \n", + "2 Two-Door Car 566.472247 NaN \n", + "3 SUV 529.881344 NaN \n", + "4 Four-Door Car 17.269323 NaN " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "1e4900fa-66f7-4d88-aaae-9c3050d1ede0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer 1\n", + "st 7071\n", + "gender 7193\n", + "education 1\n", + "customer_lifetime_value 7078\n", + "income 1\n", + "monthly_premium_auto 1\n", + "number_of_open_complaints 7071\n", + "policy_type 1\n", + "vehicle_class 1\n", + "total_claim_amount 1\n", + "state 2065\n", + "dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#check missing values\n", + "data.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a9c8ea36-a89f-4abe-9bdb-ae63cccde4e4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer object\n", + "st object\n", + "gender object\n", + "education object\n", + "customer_lifetime_value object\n", + "income float64\n", + "monthly_premium_auto float64\n", + "number_of_open_complaints object\n", + "policy_type object\n", + "vehicle_class object\n", + "total_claim_amount float64\n", + "state object\n", + "dtype: object" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#check data types\n", + "data.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ec8c6777-b218-47ed-a38f-83af21386745", + "metadata": {}, + "outputs": [], + "source": [ + "#Fix customer_lifetime_value (remove % and convert to float):\n", + "data[\"customer_lifetime_value\"] = (\n", + " data[\"customer_lifetime_value\"]\n", + " .str.replace(\"%\", \"\", regex=False) # remove % sign\n", + " .astype(float)\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "73798326-cefc-45d7-969b-9a3d6df34aeb", + "metadata": {}, + "outputs": [], + "source": [ + "cat_cols = [\"gender\", \"education\", \"policy_type\", \"vehicle_class\", \"state\"]\n", + "for col in cat_cols:\n", + " data[col] = data[col].str.lower()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "05911ead-8c3d-4a0a-b106-159802324a12", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 9135 entries, 0 to 7069\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 customer 9134 non-null object \n", + " 1 st 2064 non-null object \n", + " 2 gender 1942 non-null object \n", + " 3 education 9134 non-null object \n", + " 4 customer_lifetime_value 2057 non-null float64\n", + " 5 income 9134 non-null float64\n", + " 6 monthly_premium_auto 9134 non-null float64\n", + " 7 number_of_open_complaints 2064 non-null object \n", + " 8 policy_type 9134 non-null object \n", + " 9 vehicle_class 9134 non-null object \n", + " 10 total_claim_amount 9134 non-null float64\n", + " 11 state 7070 non-null object \n", + "dtypes: float64(4), object(8)\n", + "memory usage: 927.8+ KB\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstgendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amountstate
0RB50392WashingtonNaNmasterNaN0.01000.01/0/00personal autofour-door car2.704934NaN
1QZ44356Arizonafbachelor697953.590.094.01/0/00personal autofour-door car1131.464935NaN
2AI49188Nevadafbachelor1288743.1748767.0108.01/0/00personal autotwo-door car566.472247NaN
3WW63253Californiambachelor764586.180.0106.01/0/00corporate autosuv529.881344NaN
4GA49547Washingtonmhigh school or below536307.6536357.068.01/0/00personal autofour-door car17.269323NaN
\n", + "
" + ], + "text/plain": [ + " customer st gender education customer_lifetime_value \\\n", + "0 RB50392 Washington NaN master NaN \n", + "1 QZ44356 Arizona f bachelor 697953.59 \n", + "2 AI49188 Nevada f bachelor 1288743.17 \n", + "3 WW63253 California m bachelor 764586.18 \n", + "4 GA49547 Washington m high school or below 536307.65 \n", + "\n", + " income monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 0.0 1000.0 1/0/00 personal auto \n", + "1 0.0 94.0 1/0/00 personal auto \n", + "2 48767.0 108.0 1/0/00 personal auto \n", + "3 0.0 106.0 1/0/00 corporate auto \n", + "4 36357.0 68.0 1/0/00 personal auto \n", + "\n", + " vehicle_class total_claim_amount state \n", + "0 four-door car 2.704934 NaN \n", + "1 four-door car 1131.464935 NaN \n", + "2 two-door car 566.472247 NaN \n", + "3 suv 529.881344 NaN \n", + "4 four-door car 17.269323 NaN " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.info()\n", + "data.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "ca19ff11-8332-4fbe-a630-4448f755af47", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['customer', 'st', 'gender', 'education', 'customer_lifetime_value', 'income', 'monthly_premium_auto', 'number_of_open_complaints', 'policy_type', 'vehicle_class', 'total_claim_amount', 'state']\n" + ] + } + ], + "source": [ + "print(data.columns.tolist())" ] }, { @@ -70,18 +909,6 @@ "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by performing data cleaning, formatting, and structuring." ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26", - "metadata": { - "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26" - }, - "outputs": [], - "source": [ - "# Your code goes here" - ] - }, { "cell_type": "markdown", "id": "df35fd0d-513e-4e77-867e-429da10a9cc7", @@ -103,6 +930,1005 @@ "2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights." ] }, + { + "cell_type": "code", + "execution_count": 17, + "id": "9bed770d-47c9-4ace-95ff-e5356c8da716", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
00DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
11KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
22LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
33XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
44QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
\n", + "

5 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 9 Corporate Auto Corporate L3 Offer3 \n", + "1 1 Personal Auto Personal L3 Offer4 \n", + "2 2 Personal Auto Personal L3 Offer3 \n", + "3 2 Corporate Auto Corporate L3 Offer2 \n", + "4 7 Personal Auto Personal L2 Offer1 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "0 Agent 292.800000 Four-Door Car Medsize \n", + "1 Call Center 744.924331 Four-Door Car Medsize \n", + "2 Call Center 480.000000 SUV Medsize \n", + "3 Branch 484.013411 Four-Door Car Medsize \n", + "4 Branch 707.925645 Four-Door Car Medsize \n", + "\n", + " vehicle_type month \n", + "0 A 2 \n", + "1 A 1 \n", + "2 A 2 \n", + "3 A 1 \n", + "4 A 1 \n", + "\n", + "[5 rows x 27 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "data2 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\")\n", + "\n", + "display(data2.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "bc754a1e-4b02-41a1-8cef-013ff22a00f8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(10910, 27)\n" + ] + } + ], + "source": [ + "print(data2.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "6add55c2-2def-489d-8e53-310868042a28", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 10910 entries, 0 to 10909\n", + "Data columns (total 27 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 unnamed:_0 10910 non-null int64 \n", + " 1 customer 10910 non-null object \n", + " 2 state 10910 non-null object \n", + " 3 customer_lifetime_value 10910 non-null float64\n", + " 4 response 10910 non-null object \n", + " 5 coverage 10910 non-null object \n", + " 6 education 10910 non-null object \n", + " 7 effective_to_date 10910 non-null object \n", + " 8 employmentstatus 10910 non-null object \n", + " 9 gender 10910 non-null object \n", + " 10 income 10910 non-null int64 \n", + " 11 location_code 10910 non-null object \n", + " 12 marital_status 10910 non-null object \n", + " 13 monthly_premium_auto 10910 non-null int64 \n", + " 14 months_since_last_claim 10910 non-null float64\n", + " 15 months_since_policy_inception 10910 non-null int64 \n", + " 16 number_of_open_complaints 10910 non-null float64\n", + " 17 number_of_policies 10910 non-null int64 \n", + " 18 policy_type 10910 non-null object \n", + " 19 policy 10910 non-null object \n", + " 20 renew_offer_type 10910 non-null object \n", + " 21 sales_channel 10910 non-null object \n", + " 22 total_claim_amount 10910 non-null float64\n", + " 23 vehicle_class 10910 non-null object \n", + " 24 vehicle_size 10910 non-null object \n", + " 25 vehicle_type 10910 non-null object \n", + " 26 month 10910 non-null int64 \n", + "dtypes: float64(4), int64(6), object(17)\n", + "memory usage: 2.2+ MB\n" + ] + } + ], + "source": [ + "data2.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "e5c2eab2-93c7-4be4-9ad0-ee20bc31e8bd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['unnamed:_0', 'customer', 'state', 'customer_lifetime_value',\n", + " 'response', 'coverage', 'education', 'effective_to_date',\n", + " 'employmentstatus', 'gender', 'income', 'location_code',\n", + " 'marital_status', 'monthly_premium_auto', 'months_since_last_claim',\n", + " 'months_since_policy_inception', 'number_of_open_complaints',\n", + " 'number_of_policies', 'policy_type', 'policy', 'renew_offer_type',\n", + " 'sales_channel', 'total_claim_amount', 'vehicle_class', 'vehicle_size',\n", + " 'vehicle_type', 'month'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(data2.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "25bacdbe-f0a5-4dfb-978c-5e5dad441eca", + "metadata": {}, + "outputs": [], + "source": [ + "# Standardize column names\n", + "data2.columns = [col.lower().replace(\" \", \"_\") for col in data2.columns]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "aaa765c3-4f77-41f6-8ac5-0429b4a1d3b4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "unnamed:_0 0\n", + "customer 0\n", + "state 0\n", + "customer_lifetime_value 0\n", + "response 0\n", + "coverage 0\n", + "education 0\n", + "effective_to_date 0\n", + "employmentstatus 0\n", + "gender 0\n", + "income 0\n", + "location_code 0\n", + "marital_status 0\n", + "monthly_premium_auto 0\n", + "months_since_last_claim 0\n", + "months_since_policy_inception 0\n", + "number_of_open_complaints 0\n", + "number_of_policies 0\n", + "policy_type 0\n", + "policy 0\n", + "renew_offer_type 0\n", + "sales_channel 0\n", + "total_claim_amount 0\n", + "vehicle_class 0\n", + "vehicle_size 0\n", + "vehicle_type 0\n", + "month 0\n", + "dtype: int64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check missing values\n", + "data2.isnull().sum()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "8a2b3ba6-9a00-446f-8493-dd2ea01f85a3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "unnamed:_0 int64\n", + "customer object\n", + "state object\n", + "customer_lifetime_value float64\n", + "response object\n", + "coverage object\n", + "education object\n", + "effective_to_date object\n", + "employmentstatus object\n", + "gender object\n", + "income int64\n", + "location_code object\n", + "marital_status object\n", + "monthly_premium_auto int64\n", + "months_since_last_claim float64\n", + "months_since_policy_inception int64\n", + "number_of_open_complaints float64\n", + "number_of_policies int64\n", + "policy_type object\n", + "policy object\n", + "renew_offer_type object\n", + "sales_channel object\n", + "total_claim_amount float64\n", + "vehicle_class object\n", + "vehicle_size object\n", + "vehicle_type object\n", + "month int64\n", + "dtype: object" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check data types\n", + "data2.dtypes\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "2a49bce6-0f5b-4236-9d56-3d62f2ae9f46", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 10910 entries, 0 to 10909\n", + "Data columns (total 26 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 customer 10910 non-null object \n", + " 1 state 10910 non-null object \n", + " 2 customer_lifetime_value 10910 non-null float64 \n", + " 3 response 10910 non-null object \n", + " 4 coverage 10910 non-null object \n", + " 5 education 10910 non-null object \n", + " 6 effective_to_date 10910 non-null datetime64[ns]\n", + " 7 employmentstatus 10910 non-null object \n", + " 8 gender 10910 non-null object \n", + " 9 income 10910 non-null int64 \n", + " 10 location_code 10910 non-null object \n", + " 11 marital_status 10910 non-null object \n", + " 12 monthly_premium_auto 10910 non-null int64 \n", + " 13 months_since_last_claim 10910 non-null float64 \n", + " 14 months_since_policy_inception 10910 non-null int64 \n", + " 15 number_of_open_complaints 10910 non-null float64 \n", + " 16 number_of_policies 10910 non-null int64 \n", + " 17 policy_type 10910 non-null object \n", + " 18 policy 10910 non-null object \n", + " 19 renew_offer_type 10910 non-null object \n", + " 20 sales_channel 10910 non-null object \n", + " 21 total_claim_amount 10910 non-null float64 \n", + " 22 vehicle_class 10910 non-null object \n", + " 23 vehicle_size 10910 non-null object \n", + " 24 vehicle_type 10910 non-null object \n", + " 25 month 10910 non-null int64 \n", + "dtypes: datetime64[ns](1), float64(4), int64(5), object(16)\n", + "memory usage: 2.2+ MB\n", + "None\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
0DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM48029...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
1KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF0...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
2LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM22139...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
3XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM49078...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
4QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF23675...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage education \\\n", + "0 DK49336 Arizona 4809.216960 No Basic College \n", + "1 KX64629 California 2228.525238 No Basic College \n", + "2 LZ68649 Washington 14947.917300 No Basic Bachelor \n", + "3 XL78013 Oregon 22332.439460 Yes Extended College \n", + "4 QA50777 Oregon 9025.067525 No Premium Bachelor \n", + "\n", + " effective_to_date employmentstatus gender income ... number_of_policies \\\n", + "0 2011-02-18 Employed M 48029 ... 9 \n", + "1 2011-01-18 Unemployed F 0 ... 1 \n", + "2 2011-02-10 Employed M 22139 ... 2 \n", + "3 2011-01-11 Employed M 49078 ... 2 \n", + "4 2011-01-17 Medical Leave F 23675 ... 7 \n", + "\n", + " policy_type policy renew_offer_type sales_channel \\\n", + "0 Corporate Auto Corporate L3 Offer3 Agent \n", + "1 Personal Auto Personal L3 Offer4 Call Center \n", + "2 Personal Auto Personal L3 Offer3 Call Center \n", + "3 Corporate Auto Corporate L3 Offer2 Branch \n", + "4 Personal Auto Personal L2 Offer1 Branch \n", + "\n", + " total_claim_amount vehicle_class vehicle_size vehicle_type month \n", + "0 292.800000 Four-Door Car Medsize A 2 \n", + "1 744.924331 Four-Door Car Medsize A 1 \n", + "2 480.000000 SUV Medsize A 2 \n", + "3 484.013411 Four-Door Car Medsize A 1 \n", + "4 707.925645 Four-Door Car Medsize A 1 \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Drop useless index column\n", + "data2 = data2.drop(columns=[\"unnamed:_0\"])\n", + "\n", + "\n", + "# Convert effective_to_date to datetime\n", + "data2[\"effective_to_date\"] = pd.to_datetime(data2[\"effective_to_date\"])\n", + "\n", + "print(data2.info())\n", + "display(data2.head())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "7708ab97-b17b-4f4e-85e8-45151aff5f0e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
total_claim_amount
sales_channel
Agent1,810,226.82
Branch1,301,204.00
Call Center926,600.82
Web706,600.04
\n", + "
" + ], + "text/plain": [ + " total_claim_amount\n", + "sales_channel \n", + "Agent 1,810,226.82\n", + "Branch 1,301,204.00\n", + "Call Center 926,600.82\n", + "Web 706,600.04" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#1 Total Revenue per Sales Channel\n", + "pivot1 = data2.pivot_table(\n", + " index=\"sales_channel\",\n", + " values=\"total_claim_amount\",\n", + " aggfunc=\"sum\").round(2)\n", + "\n", + "# Format numbers with comma separator\n", + "pivot1[\"total_claim_amount\"] = pivot1[\"total_claim_amount\"].map(\"{:,.2f}\".format)\n", + "\n", + "display(pivot1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c21a995a-937f-45bc-b0a4-c9dc55d7cff6", + "metadata": {}, + "outputs": [], + "source": [ + "#Conclusion: the sales_channel \"Agent\" generated more revenue. " + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "049cc966-ce8b-45f9-b222-a41dbda1e8ab", + "metadata": {}, + "outputs": [], + "source": [ + "# Task 2: Average customer lifetime value per gender and education level\n", + "\n", + "#We want a pivot table where:\n", + "\n", + "#Rows = gender\n", + "\n", + "#Columns = education\n", + "\n", + "#Values = customer_lifetime_value\n", + "\n", + "#Aggregation = average (mean)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "cb4bd42b-d8ba-4c06-854a-b822317d32ff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
educationBachelorCollegeDoctorHigh School or BelowMaster
gender
F7,874.277,748.827,328.518,675.228,157.05
M7,703.608,052.467,415.338,149.698,168.83
\n", + "
" + ], + "text/plain": [ + "education Bachelor College Doctor High School or Below Master\n", + "gender \n", + "F 7,874.27 7,748.82 7,328.51 8,675.22 8,157.05\n", + "M 7,703.60 8,052.46 7,415.33 8,149.69 8,168.83" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pivot2 = data2.pivot_table(\n", + " index=\"gender\",\n", + " columns=\"education\",\n", + " values=\"customer_lifetime_value\",\n", + " aggfunc=\"mean\").round(2)\n", + "\n", + "# Format numbers with commas\n", + "pivot2 = pivot2.map(\"{:,.2f}\".format)\n", + "\n", + "display(pivot2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "d083732c-121e-4318-9ef3-d8f7f09c5a12", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
educationHigh School or BelowCollegeBachelorMasterDoctor
gender
F8,675.227,748.827,874.278,157.057,328.51
M8,149.698,052.467,703.608,168.837,415.33
\n", + "
" + ], + "text/plain": [ + "education High School or Below College Bachelor Master Doctor\n", + "gender \n", + "F 8,675.22 7,748.82 7,874.27 8,157.05 7,328.51\n", + "M 8,149.69 8,052.46 7,703.60 8,168.83 7,415.33" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Create pivot again\n", + "pivot2 = data2.pivot_table(\n", + " index=\"gender\",\n", + " columns=\"education\",\n", + " values=\"customer_lifetime_value\",\n", + " aggfunc=\"mean\"\n", + ").round(2)\n", + "\n", + "# Reorder the columns\n", + "order = [\"High School or Below\", \"College\", \"Bachelor\", \"Master\", \"Doctor\"]\n", + "pivot2 = pivot2[order]\n", + "\n", + "# Format with commas\n", + "pivot2 = pivot2.map(\"{:,.2f}\".format)\n", + "\n", + "display(pivot2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ec5cd14-f753-4191-98f7-7d07cf0a4f74", + "metadata": {}, + "outputs": [], + "source": [ + "# Conclusion:\n", + "#Customers with a Doctor degree have the lowest lifetime value, while the highest values come from females with High School or Below and males with a Master’s degree." + ] + }, { "cell_type": "markdown", "id": "32c7f2e5-3d90-43e5-be33-9781b6069198", @@ -130,15 +1956,153 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "id": "3a069e0b-b400-470e-904d-d17582191be4", "metadata": { "id": "3a069e0b-b400-470e-904d-d17582191be4" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
policy_typeeffective_to_datecomplaints_count
0Corporate Auto2011-01-0119.384256
1Corporate Auto2011-01-0211.768512
2Corporate Auto2011-01-0320.537024
3Corporate Auto2011-01-0414.768512
4Corporate Auto2011-01-0514.000000
............
172Special Auto2011-02-2413.000000
173Special Auto2011-02-250.000000
174Special Auto2011-02-261.000000
175Special Auto2011-02-274.000000
176Special Auto2011-02-282.000000
\n", + "

177 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " policy_type effective_to_date complaints_count\n", + "0 Corporate Auto 2011-01-01 19.384256\n", + "1 Corporate Auto 2011-01-02 11.768512\n", + "2 Corporate Auto 2011-01-03 20.537024\n", + "3 Corporate Auto 2011-01-04 14.768512\n", + "4 Corporate Auto 2011-01-05 14.000000\n", + ".. ... ... ...\n", + "172 Special Auto 2011-02-24 13.000000\n", + "173 Special Auto 2011-02-25 0.000000\n", + "174 Special Auto 2011-02-26 1.000000\n", + "175 Special Auto 2011-02-27 4.000000\n", + "176 Special Auto 2011-02-28 2.000000\n", + "\n", + "[177 rows x 3 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "# Your code goes here" + "\n", + "# Create the summary table\n", + "complaints = (\n", + " data2.groupby([\"policy_type\", \"effective_to_date\"])[\"number_of_open_complaints\"]\n", + " .sum()\n", + " .reset_index()\n", + " .rename(columns={\"number_of_open_complaints\": \"complaints_count\"})\n", + " .sort_values([\"policy_type\", \"effective_to_date\"]))\n", + "\n", + "# Display result\n", + "display(complaints)\n", + "\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c22034d3-0e05-4e23-a752-94659f7ce654", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -160,7 +2124,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4,