data-bootcamp-v4 · lucielopez · Aug 3, 2025
diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb
@@ -36,6 +36,68 @@
         "   - have a response \"Yes\" to the last marketing campaign."
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "id": "f4cb4513",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "    unnamed:_0 customer       state  customer_lifetime_value response  \\\n",
+            "3            3  XL78013      Oregon             22332.439460      Yes   \n",
+            "8            8  FM55990  California              5989.773931      Yes   \n",
+            "15          15  CW49887  California              4626.801093      Yes   \n",
+            "19          19  NJ54277  California              3746.751625      Yes   \n",
+            "27          27  MQ68407      Oregon              4376.363592      Yes   \n",
+            "\n",
+            "    coverage education effective_to_date employmentstatus gender  ...  \\\n",
+            "3   Extended   College           1/11/11         Employed      M  ...   \n",
+            "8    Premium   College           1/19/11         Employed      M  ...   \n",
+            "15     Basic    Master           1/16/11         Employed      F  ...   \n",
+            "19  Extended   College           2/26/11         Employed      F  ...   \n",
+            "27   Premium  Bachelor           2/28/11         Employed      F  ...   \n",
+            "\n",
+            "    number_of_open_complaints number_of_policies     policy_type  \\\n",
+            "3                         0.0                  2  Corporate Auto   \n",
+            "8                         0.0                  1   Personal Auto   \n",
+            "15                        0.0                  1    Special Auto   \n",
+            "19                        1.0                  1   Personal Auto   \n",
+            "27                        0.0                  1   Personal Auto   \n",
+            "\n",
+            "          policy  renew_offer_type  sales_channel  total_claim_amount  \\\n",
+            "3   Corporate L3            Offer2         Branch          484.013411   \n",
+            "8    Personal L1            Offer2         Branch          739.200000   \n",
+            "15    Special L1            Offer2         Branch          547.200000   \n",
+            "19   Personal L2            Offer2    Call Center           19.575683   \n",
+            "27   Personal L3            Offer2          Agent           60.036683   \n",
+            "\n",
+            "    vehicle_class vehicle_size vehicle_type  \n",
+            "3   Four-Door Car      Medsize            A  \n",
+            "8      Sports Car      Medsize          NaN  \n",
+            "15            SUV      Medsize          NaN  \n",
+            "19   Two-Door Car        Large            A  \n",
+            "27  Four-Door Car      Medsize          NaN  \n",
+            "\n",
+            "[5 rows x 26 columns]\n"
+          ]
+        }
+      ],
+      "source": [
+        "import pandas as pd\n",
+        "\n",
+        "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n",
+        "df = pd.read_csv(url)\n",
+        "\n",
+        "df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')\n",
+        "\n",
+        "filtered_df = df[(df['total_claim_amount'] < 1000) & (df['response'] == 'Yes')]\n",
+        "\n",
+        "print(filtered_df.head())\n"
+      ]
+    },
     {
       "cell_type": "markdown",
       "id": "b9be383e-5165-436e-80c8-57d4c757c8c3",
@@ -48,6 +110,55 @@
         "   - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company."
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "id": "207fe97f",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "                       customer_lifetime_value  monthly_premium_auto  \\\n",
+            "policy_type    gender                                                  \n",
+            "Corporate Auto F                       7712.63                 94.30   \n",
+            "               M                       7944.47                 92.19   \n",
+            "Personal Auto  F                       8339.79                 99.00   \n",
+            "               M                       7448.38                 91.09   \n",
+            "Special Auto   F                       7691.58                 92.31   \n",
+            "               M                       8247.09                 86.34   \n",
+            "\n",
+            "                       total_claim_amount  \n",
+            "policy_type    gender                      \n",
+            "Corporate Auto F                   433.74  \n",
+            "               M                   408.58  \n",
+            "Personal Auto  F                   452.97  \n",
+            "               M                   457.01  \n",
+            "Special Auto   F                   453.28  \n",
+            "               M                   429.53  \n"
+          ]
+        }
+      ],
+      "source": [
+        "import pandas as pd\n",
+        "\n",
+        "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n",
+        "df = pd.read_csv(url)\n",
+        "\n",
+        "df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')\n",
+        "\n",
+        "df_yes = df[df['response'] == 'Yes']\n",
+        "\n",
+        "avg_metrics = df_yes.pivot_table(\n",
+        "    index=['policy_type', 'gender'],\n",
+        "    values=['monthly_premium_auto', 'customer_lifetime_value', 'total_claim_amount'],\n",
+        "    aggfunc='mean'\n",
+        ").round(2)\n",
+        "\n",
+        "print(avg_metrics)\n"
+      ]
+    },
     {
       "cell_type": "markdown",
       "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0",
@@ -58,6 +169,34 @@
         "3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers."
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "id": "6a039528",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "state\n",
+            "California    3552\n",
+            "Oregon        2909\n",
+            "Arizona       1937\n",
+            "Nevada         993\n",
+            "Washington     888\n",
+            "Name: count, dtype: int64\n"
+          ]
+        }
+      ],
+      "source": [
+        "state_counts = df['state'].value_counts()\n",
+        "\n",
+        "states_over_500 = state_counts[state_counts > 500]\n",
+        "\n",
+        "print(states_over_500)\n"
+      ]
+    },
     {
       "cell_type": "markdown",
       "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d",
@@ -68,6 +207,37 @@
         "4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions."
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "id": "cf21916b",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "                                  max      min   median\n",
+            "education            gender                            \n",
+            "Bachelor             F       73225.96  1904.00  5640.51\n",
+            "                     M       67907.27  1898.01  5548.03\n",
+            "College              F       61850.19  1898.68  5623.61\n",
+            "                     M       61134.68  1918.12  6005.85\n",
+            "Doctor               F       44856.11  2395.57  5332.46\n",
+            "                     M       32677.34  2267.60  5577.67\n",
+            "High School or Below F       55277.45  2144.92  6039.55\n",
+            "                     M       83325.38  1940.98  6286.73\n",
+            "Master               F       51016.07  2417.78  5729.86\n",
+            "                     M       50568.26  2272.31  5579.10\n"
+          ]
+        }
+      ],
+      "source": [
+        "clv_stats = df.groupby(['education', 'gender'])['customer_lifetime_value'].agg(['max', 'min', 'median']).round(2)\n",
+        "\n",
+        "print(clv_stats)\n"
+      ]
+    },
     {
       "cell_type": "markdown",
       "id": "b42999f9-311f-481e-ae63-40a5577072c5",
@@ -143,7 +313,7 @@
       "provenance": []
     },
     "kernelspec": {
-      "display_name": "Python 3 (ipykernel)",
+      "display_name": "base",
       "language": "python",
       "name": "python3"
     },
@@ -157,7 +327,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.13"
+      "version": "3.12.2"
     }
   },
   "nbformat": 4,