From 0f92cb3cc84b8df2226f98611a69a5db7bb2dfd4 Mon Sep 17 00:00:00 2001 From: Lucie Lopez Date: Sun, 3 Aug 2025 19:45:55 +0200 Subject: [PATCH] Lab OK --- lab-dw-aggregating.ipynb | 174 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 172 insertions(+), 2 deletions(-) diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb index fadd718..e98a8c5 100644 --- a/lab-dw-aggregating.ipynb +++ b/lab-dw-aggregating.ipynb @@ -36,6 +36,68 @@ " - have a response \"Yes\" to the last marketing campaign." ] }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f4cb4513", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "8 8 FM55990 California 5989.773931 Yes \n", + "15 15 CW49887 California 4626.801093 Yes \n", + "19 19 NJ54277 California 3746.751625 Yes \n", + "27 27 MQ68407 Oregon 4376.363592 Yes \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "3 Extended College 1/11/11 Employed M ... \n", + "8 Premium College 1/19/11 Employed M ... \n", + "15 Basic Master 1/16/11 Employed F ... \n", + "19 Extended College 2/26/11 Employed F ... \n", + "27 Premium Bachelor 2/28/11 Employed F ... \n", + "\n", + " number_of_open_complaints number_of_policies policy_type \\\n", + "3 0.0 2 Corporate Auto \n", + "8 0.0 1 Personal Auto \n", + "15 0.0 1 Special Auto \n", + "19 1.0 1 Personal Auto \n", + "27 0.0 1 Personal Auto \n", + "\n", + " policy renew_offer_type sales_channel total_claim_amount \\\n", + "3 Corporate L3 Offer2 Branch 484.013411 \n", + "8 Personal L1 Offer2 Branch 739.200000 \n", + "15 Special L1 Offer2 Branch 547.200000 \n", + "19 Personal L2 Offer2 Call Center 19.575683 \n", + "27 Personal L3 Offer2 Agent 60.036683 \n", + "\n", + " vehicle_class vehicle_size vehicle_type \n", + "3 Four-Door Car Medsize A \n", + "8 Sports Car Medsize NaN \n", + "15 SUV Medsize NaN \n", + "19 Two-Door Car Large A \n", + "27 Four-Door Car Medsize NaN \n", + "\n", + "[5 rows x 26 columns]\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n", + "df = pd.read_csv(url)\n", + "\n", + "df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')\n", + "\n", + "filtered_df = df[(df['total_claim_amount'] < 1000) & (df['response'] == 'Yes')]\n", + "\n", + "print(filtered_df.head())\n" + ] + }, { "cell_type": "markdown", "id": "b9be383e-5165-436e-80c8-57d4c757c8c3", @@ -48,6 +110,55 @@ " - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company." ] }, + { + "cell_type": "code", + "execution_count": 2, + "id": "207fe97f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " customer_lifetime_value monthly_premium_auto \\\n", + "policy_type gender \n", + "Corporate Auto F 7712.63 94.30 \n", + " M 7944.47 92.19 \n", + "Personal Auto F 8339.79 99.00 \n", + " M 7448.38 91.09 \n", + "Special Auto F 7691.58 92.31 \n", + " M 8247.09 86.34 \n", + "\n", + " total_claim_amount \n", + "policy_type gender \n", + "Corporate Auto F 433.74 \n", + " M 408.58 \n", + "Personal Auto F 452.97 \n", + " M 457.01 \n", + "Special Auto F 453.28 \n", + " M 429.53 \n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n", + "df = pd.read_csv(url)\n", + "\n", + "df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')\n", + "\n", + "df_yes = df[df['response'] == 'Yes']\n", + "\n", + "avg_metrics = df_yes.pivot_table(\n", + " index=['policy_type', 'gender'],\n", + " values=['monthly_premium_auto', 'customer_lifetime_value', 'total_claim_amount'],\n", + " aggfunc='mean'\n", + ").round(2)\n", + "\n", + "print(avg_metrics)\n" + ] + }, { "cell_type": "markdown", "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0", @@ -58,6 +169,34 @@ "3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers." ] }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6a039528", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "state\n", + "California 3552\n", + "Oregon 2909\n", + "Arizona 1937\n", + "Nevada 993\n", + "Washington 888\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "state_counts = df['state'].value_counts()\n", + "\n", + "states_over_500 = state_counts[state_counts > 500]\n", + "\n", + "print(states_over_500)\n" + ] + }, { "cell_type": "markdown", "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d", @@ -68,6 +207,37 @@ "4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions." ] }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cf21916b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " max min median\n", + "education gender \n", + "Bachelor F 73225.96 1904.00 5640.51\n", + " M 67907.27 1898.01 5548.03\n", + "College F 61850.19 1898.68 5623.61\n", + " M 61134.68 1918.12 6005.85\n", + "Doctor F 44856.11 2395.57 5332.46\n", + " M 32677.34 2267.60 5577.67\n", + "High School or Below F 55277.45 2144.92 6039.55\n", + " M 83325.38 1940.98 6286.73\n", + "Master F 51016.07 2417.78 5729.86\n", + " M 50568.26 2272.31 5579.10\n" + ] + } + ], + "source": [ + "clv_stats = df.groupby(['education', 'gender'])['customer_lifetime_value'].agg(['max', 'min', 'median']).round(2)\n", + "\n", + "print(clv_stats)\n" + ] + }, { "cell_type": "markdown", "id": "b42999f9-311f-481e-ae63-40a5577072c5", @@ -143,7 +313,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -157,7 +327,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.12.2" } }, "nbformat": 4,