data-bootcamp-v4 · MBengochea · Sep 12, 2025
diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb
@@ -127,14 +127,173 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 2,
       "id": "449513f4-0459-46a0-a18d-9398d974c9ad",
       "metadata": {
         "id": "449513f4-0459-46a0-a18d-9398d974c9ad"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "1. Customers with low total claim amount and 'Yes' to response:\n",
+            "    Unnamed: 0 Customer       State  Customer Lifetime Value Response  \\\n",
+            "3            3  XL78013      Oregon             22332.439460      Yes   \n",
+            "8            8  FM55990  California              5989.773931      Yes   \n",
+            "15          15  CW49887  California              4626.801093      Yes   \n",
+            "19          19  NJ54277  California              3746.751625      Yes   \n",
+            "27          27  MQ68407      Oregon              4376.363592      Yes   \n",
+            "\n",
+            "    Coverage Education Effective To Date EmploymentStatus Gender  ...  \\\n",
+            "3   Extended   College           1/11/11         Employed      M  ...   \n",
+            "8    Premium   College           1/19/11         Employed      M  ...   \n",
+            "15     Basic    Master           1/16/11         Employed      F  ...   \n",
+            "19  Extended   College           2/26/11         Employed      F  ...   \n",
+            "27   Premium  Bachelor           2/28/11         Employed      F  ...   \n",
+            "\n",
+            "    Number of Open Complaints Number of Policies     Policy Type  \\\n",
+            "3                         0.0                  2  Corporate Auto   \n",
+            "8                         0.0                  1   Personal Auto   \n",
+            "15                        0.0                  1    Special Auto   \n",
+            "19                        1.0                  1   Personal Auto   \n",
+            "27                        0.0                  1   Personal Auto   \n",
+            "\n",
+            "          Policy  Renew Offer Type  Sales Channel  Total Claim Amount  \\\n",
+            "3   Corporate L3            Offer2         Branch          484.013411   \n",
+            "8    Personal L1            Offer2         Branch          739.200000   \n",
+            "15    Special L1            Offer2         Branch          547.200000   \n",
+            "19   Personal L2            Offer2    Call Center           19.575683   \n",
+            "27   Personal L3            Offer2          Agent           60.036683   \n",
+            "\n",
+            "    Vehicle Class Vehicle Size Vehicle Type  \n",
+            "3   Four-Door Car      Medsize            A  \n",
+            "8      Sports Car      Medsize          NaN  \n",
+            "15            SUV      Medsize          NaN  \n",
+            "19   Two-Door Car        Large            A  \n",
+            "27  Four-Door Car      Medsize          NaN  \n",
+            "\n",
+            "[5 rows x 26 columns]\n",
+            "\n",
+            "2. Average metrics by policy type and gender for 'Yes' responses:\n",
+            "      Policy Type Gender  Monthly Premium Auto  Customer Lifetime Value  \\\n",
+            "0  Corporate Auto      F             94.301775              7712.628736   \n",
+            "1  Corporate Auto      M             92.188312              7944.465414   \n",
+            "2   Personal Auto      F             98.998148              8339.791842   \n",
+            "3   Personal Auto      M             91.085821              7448.383281   \n",
+            "4    Special Auto      F             92.314286              7691.584111   \n",
+            "5    Special Auto      M             86.343750              8247.088702   \n",
+            "\n",
+            "   Total Claim Amount  \n",
+            "0          433.738499  \n",
+            "1          408.582459  \n",
+            "2          452.965929  \n",
+            "3          457.010178  \n",
+            "4          453.280164  \n",
+            "5          429.527942  \n",
+            "\n",
+            "3. States with more than 500 customers:\n",
+            "        State  Customer Count\n",
+            "0  California            3552\n",
+            "1      Oregon            2909\n",
+            "2     Arizona            1937\n",
+            "3      Nevada             993\n",
+            "4  Washington             888\n",
+            "\n",
+            "4. Customer lifetime value stats by education and gender:\n",
+            "                                     max          min       median\n",
+            "Education            Gender                                       \n",
+            "Bachelor             F       73225.95652  1904.000852  5640.505303\n",
+            "                     M       67907.27050  1898.007675  5548.031892\n",
+            "College              F       61850.18803  1898.683686  5623.611187\n",
+            "                     M       61134.68307  1918.119700  6005.847375\n",
+            "Doctor               F       44856.11397  2395.570000  5332.462694\n",
+            "                     M       32677.34284  2267.604038  5577.669457\n",
+            "High School or Below F       55277.44589  2144.921535  6039.553187\n",
+            "                     M       83325.38119  1940.981221  6286.731006\n",
+            "Master               F       51016.06704  2417.777032  5729.855012\n",
+            "                     M       50568.25912  2272.307310  5579.099207\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "C:\\Users\\LAP-MPC\\AppData\\Local\\Temp\\ipykernel_11684\\4166574666.py:34: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
+            "  df['Effective To Date'] = pd.to_datetime(df['Effective To Date'])\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "5. Policies sold by state and month:\n",
+            "Month       February  January\n",
+            "State                        \n",
+            "Arizona          929     1008\n",
+            "California      1634     1918\n",
+            "Nevada           442      551\n",
+            "Oregon          1344     1565\n",
+            "Washington       425      463\n",
+            "\n",
+            "6. Number of policies sold by month for the top 3 states:\n",
+            "        State     Month  Policy Count\n",
+            "0     Arizona  February           929\n",
+            "1     Arizona   January          1008\n",
+            "2  California  February          1634\n",
+            "3  California   January          1918\n",
+            "6      Oregon  February          1344\n",
+            "7      Oregon   January          1565\n"
+          ]
+        }
+      ],
       "source": [
-        "# your code goes here"
+        "import pandas as pd\n",
+        "\n",
+        "# Load the dataset\n",
+        "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n",
+        "df = pd.read_csv(url)\n",
+        "\n",
+        "# 1. Filter for low total claim amount and 'Yes' responses\n",
+        "low_claim_yes_response_df = df[(df['Total Claim Amount'] < 1000) & (df['Response'] == 'Yes')]\n",
+        "print(\"1. Customers with low total claim amount and 'Yes' to response:\")\n",
+        "print(low_claim_yes_response_df.head())\n",
+        "\n",
+        "# 2. Average metrics by policy type and gender for 'Yes' responses\n",
+        "avg_metrics_df = df[df['Response'] == 'Yes'].groupby(['Policy Type', 'Gender']).agg({\n",
+        "    'Monthly Premium Auto': 'mean',\n",
+        "    'Customer Lifetime Value': 'mean',\n",
+        "    'Total Claim Amount': 'mean'\n",
+        "}).reset_index()\n",
+        "print(\"\\n2. Average metrics by policy type and gender for 'Yes' responses:\")\n",
+        "print(avg_metrics_df)\n",
+        "\n",
+        "# 3. Count customers per state (only where more than 500 customers)\n",
+        "customers_by_state = df['State'].value_counts().reset_index()\n",
+        "customers_by_state.columns = ['State', 'Customer Count']\n",
+        "filtered_states = customers_by_state[customers_by_state['Customer Count'] > 500]\n",
+        "print(\"\\n3. States with more than 500 customers:\")\n",
+        "print(filtered_states)\n",
+        "\n",
+        "# 4. Lifetime value stats by education and gender\n",
+        "clv_stats = df.groupby(['Education', 'Gender'])['Customer Lifetime Value'].agg(['max', 'min', 'median'])\n",
+        "print(\"\\n4. Customer lifetime value stats by education and gender:\")\n",
+        "print(clv_stats)\n",
+        "\n",
+        "# 5. Policies sold by state and month\n",
+        "df['Effective To Date'] = pd.to_datetime(df['Effective To Date'])\n",
+        "df['Month'] = df['Effective To Date'].dt.strftime('%B')\n",
+        "policies_by_state_month = df.groupby(['State', 'Month']).size().unstack(fill_value=0)\n",
+        "print(\"\\n5. Policies sold by state and month:\")\n",
+        "print(policies_by_state_month)\n",
+        "\n",
+        "# 6. Find the top 3 states with the most policies sold\n",
+        "policies_by_state_month_count = df.groupby(['State', 'Month']).size().reset_index(name='Policy Count')\n",
+        "top_states_by_policy = policies_by_state_month_count.groupby('State')['Policy Count'].sum().nlargest(3).index\n",
+        "top_policies_by_state_month = policies_by_state_month_count[policies_by_state_month_count['State'].isin(top_states_by_policy)]\n",
+        "print(\"\\n6. Number of policies sold by month for the top 3 states:\")\n",
+        "print(top_policies_by_state_month)"
       ]
     }
   ],
@@ -143,7 +302,7 @@
       "provenance": []
     },
     "kernelspec": {
-      "display_name": "Python 3 (ipykernel)",
+      "display_name": "base",
       "language": "python",
       "name": "python3"
     },
@@ -157,7 +316,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.13"
+      "version": "3.13.5"
     }
   },
   "nbformat": 4,