data-bootcamp-v4 · NodrrS · Aug 4, 2025
diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb
@@ -115,6 +115,35 @@
         "Hint: You can use melt to unpivot the data and create a table that shows the customer response rate (those who responded \"Yes\") by marketing channel."
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": 32,
+      "id": "509e3e33",
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "Index(['Unnamed: 0', 'Customer', 'State', 'Customer Lifetime Value',\n",
+              "       'Response', 'Coverage', 'Education', 'Effective To Date',\n",
+              "       'EmploymentStatus', 'Gender', 'Income', 'Location Code',\n",
+              "       'Marital Status', 'Monthly Premium Auto', 'Months Since Last Claim',\n",
+              "       'Months Since Policy Inception', 'Number of Open Complaints',\n",
+              "       'Number of Policies', 'Policy Type', 'Policy', 'Renew Offer Type',\n",
+              "       'Sales Channel', 'Total Claim Amount', 'Vehicle Class', 'Vehicle Size',\n",
+              "       'Vehicle Type'],\n",
+              "      dtype='object')"
+            ]
+          },
+          "execution_count": 32,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "df.columns"
+      ]
+    },
     {
       "cell_type": "markdown",
       "id": "e4378d94-48fb-4850-a802-b1bc8f427b2d",
@@ -132,18 +161,243 @@
       "metadata": {
         "id": "449513f4-0459-46a0-a18d-9398d974c9ad"
       },
-      "outputs": [],
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "       Unnamed: 0 Customer       State  Customer Lifetime Value Response  \\\n",
+            "3               3  XL78013      Oregon             22332.439460      Yes   \n",
+            "8               8  FM55990  California              5989.773931      Yes   \n",
+            "15             15  CW49887  California              4626.801093      Yes   \n",
+            "19             19  NJ54277  California              3746.751625      Yes   \n",
+            "27             27  MQ68407      Oregon              4376.363592      Yes   \n",
+            "...           ...      ...         ...                      ...      ...   \n",
+            "10844       10844  FM31768     Arizona              5979.724161      Yes   \n",
+            "10852       10852  KZ80424  Washington              8382.478392      Yes   \n",
+            "10872       10872  XT67997  California              5979.724161      Yes   \n",
+            "10887       10887  BY78730      Oregon              8879.790017      Yes   \n",
+            "10897       10897  MM70762     Arizona              9075.768214      Yes   \n",
+            "\n",
+            "       Coverage             Education Effective To Date EmploymentStatus  \\\n",
+            "3      Extended               College           1/11/11         Employed   \n",
+            "8       Premium               College           1/19/11         Employed   \n",
+            "15        Basic                Master           1/16/11         Employed   \n",
+            "19     Extended               College           2/26/11         Employed   \n",
+            "27      Premium              Bachelor           2/28/11         Employed   \n",
+            "...         ...                   ...               ...              ...   \n",
+            "10844  Extended  High School or Below            2/7/11         Employed   \n",
+            "10852     Basic              Bachelor           1/27/11         Employed   \n",
+            "10872  Extended  High School or Below            2/7/11         Employed   \n",
+            "10887     Basic  High School or Below            2/3/11         Employed   \n",
+            "10897     Basic                Master           1/26/11         Employed   \n",
+            "\n",
+            "      Gender  ...  Number of Open Complaints Number of Policies  \\\n",
+            "3          M  ...                        0.0                  2   \n",
+            "8          M  ...                        0.0                  1   \n",
+            "15         F  ...                        0.0                  1   \n",
+            "19         F  ...                        1.0                  1   \n",
+            "27         F  ...                        0.0                  1   \n",
+            "...      ...  ...                        ...                ...   \n",
+            "10844      F  ...                        0.0                  3   \n",
+            "10852      M  ...                        0.0                  2   \n",
+            "10872      F  ...                        0.0                  3   \n",
+            "10887      F  ...                        0.0                  7   \n",
+            "10897      M  ...                        0.0                  8   \n",
+            "\n",
+            "          Policy Type        Policy  Renew Offer Type  Sales Channel  \\\n",
+            "3      Corporate Auto  Corporate L3            Offer2         Branch   \n",
+            "8       Personal Auto   Personal L1            Offer2         Branch   \n",
+            "15       Special Auto    Special L1            Offer2         Branch   \n",
+            "19      Personal Auto   Personal L2            Offer2    Call Center   \n",
+            "27      Personal Auto   Personal L3            Offer2          Agent   \n",
+            "...               ...           ...               ...            ...   \n",
+            "10844   Personal Auto   Personal L1            Offer2          Agent   \n",
+            "10852   Personal Auto   Personal L2            Offer2    Call Center   \n",
+            "10872   Personal Auto   Personal L3            Offer2          Agent   \n",
+            "10887    Special Auto    Special L2            Offer1          Agent   \n",
+            "10897   Personal Auto   Personal L1            Offer1          Agent   \n",
+            "\n",
+            "       Total Claim Amount  Vehicle Class Vehicle Size Vehicle Type  \n",
+            "3              484.013411  Four-Door Car      Medsize            A  \n",
+            "8              739.200000     Sports Car      Medsize          NaN  \n",
+            "15             547.200000            SUV      Medsize          NaN  \n",
+            "19              19.575683   Two-Door Car        Large            A  \n",
+            "27              60.036683  Four-Door Car      Medsize          NaN  \n",
+            "...                   ...            ...          ...          ...  \n",
+            "10844          547.200000  Four-Door Car      Medsize          NaN  \n",
+            "10852          791.878042            NaN          NaN            A  \n",
+            "10872          547.200000  Four-Door Car      Medsize          NaN  \n",
+            "10887          528.200860            SUV        Small            A  \n",
+            "10897          158.077504     Sports Car      Medsize            A  \n",
+            "\n",
+            "[1399 rows x 26 columns]\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Education</th>\n",
+              "      <th>Gender</th>\n",
+              "      <th>Max CLV</th>\n",
+              "      <th>Min CLV</th>\n",
+              "      <th>Median CLV</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Bachelor</td>\n",
+              "      <td>F</td>\n",
+              "      <td>73225.95652</td>\n",
+              "      <td>1904.000852</td>\n",
+              "      <td>5640.505303</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Bachelor</td>\n",
+              "      <td>M</td>\n",
+              "      <td>67907.27050</td>\n",
+              "      <td>1898.007675</td>\n",
+              "      <td>5548.031892</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>College</td>\n",
+              "      <td>F</td>\n",
+              "      <td>61850.18803</td>\n",
+              "      <td>1898.683686</td>\n",
+              "      <td>5623.611187</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>College</td>\n",
+              "      <td>M</td>\n",
+              "      <td>61134.68307</td>\n",
+              "      <td>1918.119700</td>\n",
+              "      <td>6005.847375</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>Doctor</td>\n",
+              "      <td>F</td>\n",
+              "      <td>44856.11397</td>\n",
+              "      <td>2395.570000</td>\n",
+              "      <td>5332.462694</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "  Education Gender      Max CLV      Min CLV   Median CLV\n",
+              "0  Bachelor      F  73225.95652  1904.000852  5640.505303\n",
+              "1  Bachelor      M  67907.27050  1898.007675  5548.031892\n",
+              "2   College      F  61850.18803  1898.683686  5623.611187\n",
+              "3   College      M  61134.68307  1918.119700  6005.847375\n",
+              "4    Doctor      F  44856.11397  2395.570000  5332.462694"
+            ]
+          },
+          "execution_count": 38,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
       "source": [
-        "# your code goes here"
+        "import pandas as pd\n",
+        "url = 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv'\n",
+        "df = pd.read_csv(url)\n",
+        "\n",
+        "#new df\n",
+        "# 1. Create a new DataFrame that only includes customers who:\n",
+        "#    - have a **low total_claim_amount** (e.g., below $1,000),\n",
+        "#    - have a response \"Yes\" to the last marketing campaign.\n",
+        "\n",
+        "\n",
+        "df['Total Claim Amount'] = pd.to_numeric(df['Total Claim Amount'], errors='coerce')\n",
+        "\n",
+        "def filter_values(row):\n",
+        "    if row['Response'] == 'Yes' and row['Total Claim Amount'] < 1000:\n",
+        "        return True\n",
+        "    return False\n",
+        "filtered_df = df[['Customer','Response','Total Claim Amount']]\n",
+        "filtered_df = df[df.apply(filter_values, axis=1)]\n",
+        "\n",
+        "print(filtered_df)\n",
+        "\n",
+        "# 2. Using the original Dataframe, analyze:\n",
+        "#    - the average `monthly_premium` and/or customer lifetime value by `policy_type` and `gender` for customers who responded \"Yes\", and\n",
+        "#    - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company.\n",
+        "\n",
+        "def filter_values_yes_only(row):\n",
+        "   return row['Response'] == 'Yes'\n",
+        "\n",
+        "filtered_df2 = df[df.apply(filter_values_yes_only, axis=1)]\n",
+        "filtered_df2 = filtered_df2[['Customer','Response','Monthly Premium Auto','Customer Lifetime Value','Policy Type','Gender','Total Claim Amount']]\n",
+        "\n",
+        "grouped_stats = filtered_df2.groupby(['Policy Type', 'Gender']).agg({\n",
+        "    'Monthly Premium Auto': 'mean',\n",
+        "    'Customer Lifetime Value': 'mean',\n",
+        "    'Total Claim Amount': 'mean'\n",
+        "}).reset_index()\n",
+        "\n",
+        "grouped_stats['Avg Monthly Premium'] = grouped_stats['Monthly Premium Auto']\n",
+        "grouped_stats['Avg CLV'] = grouped_stats['Customer Lifetime Value']\n",
+        "grouped_stats['Avg Total Claim Amount'] = grouped_stats['Total Claim Amount']\n",
+        "\n",
+        "grouped_stats.head()\n",
+        "\n",
+        "# 3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers.\n",
+        "filtered_df3 = df[['Customer', 'State']]\n",
+        "state_counts = filtered_df3['State'].value_counts().reset_index()\n",
+        "state_counts.columns = ['State','Customer Count']\n",
+        "state_counts = state_counts[state_counts['Customer Count'] > 500]\n",
+        "\n",
+        "state_counts.head()\n",
+        "\n",
+        "# 4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions.\n",
+        "filtered_df4 = df[['Customer Lifetime Value', 'Education','Gender']]\n",
+        "grouped_stats2 = filtered_df4.groupby(['Education', 'Gender']).agg({\n",
+        "    'Customer Lifetime Value': ['max','min','median']\n",
+        "\n",
+        "}).reset_index()\n",
+        "grouped_stats2.columns = ['Education', 'Gender','Max CLV','Min CLV','Median CLV']\n",
+        "grouped_stats2.head()"
       ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "d75abbb1",
+      "metadata": {},
+      "outputs": [],
+      "source": []
     }
   ],
   "metadata": {
     "colab": {
       "provenance": []
     },
     "kernelspec": {
-      "display_name": "Python 3 (ipykernel)",
+      "display_name": "base",
       "language": "python",
       "name": "python3"
     },
@@ -157,7 +411,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.13"
+      "version": "3.12.2"
     }
   },
   "nbformat": 4,