diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb index fadd718..977927a 100644 --- a/lab-dw-aggregating.ipynb +++ b/lab-dw-aggregating.ipynb @@ -127,14 +127,173 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "449513f4-0459-46a0-a18d-9398d974c9ad", "metadata": { "id": "449513f4-0459-46a0-a18d-9398d974c9ad" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1. Customers with low total claim amount and 'Yes' to response:\n", + " Unnamed: 0 Customer State Customer Lifetime Value Response \\\n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "8 8 FM55990 California 5989.773931 Yes \n", + "15 15 CW49887 California 4626.801093 Yes \n", + "19 19 NJ54277 California 3746.751625 Yes \n", + "27 27 MQ68407 Oregon 4376.363592 Yes \n", + "\n", + " Coverage Education Effective To Date EmploymentStatus Gender ... \\\n", + "3 Extended College 1/11/11 Employed M ... \n", + "8 Premium College 1/19/11 Employed M ... \n", + "15 Basic Master 1/16/11 Employed F ... \n", + "19 Extended College 2/26/11 Employed F ... \n", + "27 Premium Bachelor 2/28/11 Employed F ... \n", + "\n", + " Number of Open Complaints Number of Policies Policy Type \\\n", + "3 0.0 2 Corporate Auto \n", + "8 0.0 1 Personal Auto \n", + "15 0.0 1 Special Auto \n", + "19 1.0 1 Personal Auto \n", + "27 0.0 1 Personal Auto \n", + "\n", + " Policy Renew Offer Type Sales Channel Total Claim Amount \\\n", + "3 Corporate L3 Offer2 Branch 484.013411 \n", + "8 Personal L1 Offer2 Branch 739.200000 \n", + "15 Special L1 Offer2 Branch 547.200000 \n", + "19 Personal L2 Offer2 Call Center 19.575683 \n", + "27 Personal L3 Offer2 Agent 60.036683 \n", + "\n", + " Vehicle Class Vehicle Size Vehicle Type \n", + "3 Four-Door Car Medsize A \n", + "8 Sports Car Medsize NaN \n", + "15 SUV Medsize NaN \n", + "19 Two-Door Car Large A \n", + "27 Four-Door Car Medsize NaN \n", + "\n", + "[5 rows x 26 columns]\n", + "\n", + "2. Average metrics by policy type and gender for 'Yes' responses:\n", + " Policy Type Gender Monthly Premium Auto Customer Lifetime Value \\\n", + "0 Corporate Auto F 94.301775 7712.628736 \n", + "1 Corporate Auto M 92.188312 7944.465414 \n", + "2 Personal Auto F 98.998148 8339.791842 \n", + "3 Personal Auto M 91.085821 7448.383281 \n", + "4 Special Auto F 92.314286 7691.584111 \n", + "5 Special Auto M 86.343750 8247.088702 \n", + "\n", + " Total Claim Amount \n", + "0 433.738499 \n", + "1 408.582459 \n", + "2 452.965929 \n", + "3 457.010178 \n", + "4 453.280164 \n", + "5 429.527942 \n", + "\n", + "3. States with more than 500 customers:\n", + " State Customer Count\n", + "0 California 3552\n", + "1 Oregon 2909\n", + "2 Arizona 1937\n", + "3 Nevada 993\n", + "4 Washington 888\n", + "\n", + "4. Customer lifetime value stats by education and gender:\n", + " max min median\n", + "Education Gender \n", + "Bachelor F 73225.95652 1904.000852 5640.505303\n", + " M 67907.27050 1898.007675 5548.031892\n", + "College F 61850.18803 1898.683686 5623.611187\n", + " M 61134.68307 1918.119700 6005.847375\n", + "Doctor F 44856.11397 2395.570000 5332.462694\n", + " M 32677.34284 2267.604038 5577.669457\n", + "High School or Below F 55277.44589 2144.921535 6039.553187\n", + " M 83325.38119 1940.981221 6286.731006\n", + "Master F 51016.06704 2417.777032 5729.855012\n", + " M 50568.25912 2272.307310 5579.099207\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\LAP-MPC\\AppData\\Local\\Temp\\ipykernel_11684\\4166574666.py:34: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " df['Effective To Date'] = pd.to_datetime(df['Effective To Date'])\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "5. Policies sold by state and month:\n", + "Month February January\n", + "State \n", + "Arizona 929 1008\n", + "California 1634 1918\n", + "Nevada 442 551\n", + "Oregon 1344 1565\n", + "Washington 425 463\n", + "\n", + "6. Number of policies sold by month for the top 3 states:\n", + " State Month Policy Count\n", + "0 Arizona February 929\n", + "1 Arizona January 1008\n", + "2 California February 1634\n", + "3 California January 1918\n", + "6 Oregon February 1344\n", + "7 Oregon January 1565\n" + ] + } + ], "source": [ - "# your code goes here" + "import pandas as pd\n", + "\n", + "# Load the dataset\n", + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n", + "df = pd.read_csv(url)\n", + "\n", + "# 1. Filter for low total claim amount and 'Yes' responses\n", + "low_claim_yes_response_df = df[(df['Total Claim Amount'] < 1000) & (df['Response'] == 'Yes')]\n", + "print(\"1. Customers with low total claim amount and 'Yes' to response:\")\n", + "print(low_claim_yes_response_df.head())\n", + "\n", + "# 2. Average metrics by policy type and gender for 'Yes' responses\n", + "avg_metrics_df = df[df['Response'] == 'Yes'].groupby(['Policy Type', 'Gender']).agg({\n", + " 'Monthly Premium Auto': 'mean',\n", + " 'Customer Lifetime Value': 'mean',\n", + " 'Total Claim Amount': 'mean'\n", + "}).reset_index()\n", + "print(\"\\n2. Average metrics by policy type and gender for 'Yes' responses:\")\n", + "print(avg_metrics_df)\n", + "\n", + "# 3. Count customers per state (only where more than 500 customers)\n", + "customers_by_state = df['State'].value_counts().reset_index()\n", + "customers_by_state.columns = ['State', 'Customer Count']\n", + "filtered_states = customers_by_state[customers_by_state['Customer Count'] > 500]\n", + "print(\"\\n3. States with more than 500 customers:\")\n", + "print(filtered_states)\n", + "\n", + "# 4. Lifetime value stats by education and gender\n", + "clv_stats = df.groupby(['Education', 'Gender'])['Customer Lifetime Value'].agg(['max', 'min', 'median'])\n", + "print(\"\\n4. Customer lifetime value stats by education and gender:\")\n", + "print(clv_stats)\n", + "\n", + "# 5. Policies sold by state and month\n", + "df['Effective To Date'] = pd.to_datetime(df['Effective To Date'])\n", + "df['Month'] = df['Effective To Date'].dt.strftime('%B')\n", + "policies_by_state_month = df.groupby(['State', 'Month']).size().unstack(fill_value=0)\n", + "print(\"\\n5. Policies sold by state and month:\")\n", + "print(policies_by_state_month)\n", + "\n", + "# 6. Find the top 3 states with the most policies sold\n", + "policies_by_state_month_count = df.groupby(['State', 'Month']).size().reset_index(name='Policy Count')\n", + "top_states_by_policy = policies_by_state_month_count.groupby('State')['Policy Count'].sum().nlargest(3).index\n", + "top_policies_by_state_month = policies_by_state_month_count[policies_by_state_month_count['State'].isin(top_states_by_policy)]\n", + "print(\"\\n6. Number of policies sold by month for the top 3 states:\")\n", + "print(top_policies_by_state_month)" ] } ], @@ -143,7 +302,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -157,7 +316,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4,