diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb index fadd718..de0ed24 100644 --- a/lab-dw-aggregating.ipynb +++ b/lab-dw-aggregating.ipynb @@ -24,6 +24,243 @@ "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by first performing data cleaning, formatting, and structuring." ] }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a34717ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...number_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_type
0DK49336Arizona4809.216960NoBasicCollege2/18/11EmployedM48029...0.09Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeNaN
1KX64629California2228.525238NoBasicCollege1/18/11UnemployedF0...0.01Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeNaN
2LZ68649Washington14947.917300NoBasicBachelor2/10/11EmployedM22139...0.02Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA
3XL78013Oregon22332.439460YesExtendedCollege1/11/11EmployedM49078...0.02Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA
4QA50777Oregon9025.067525NoPremiumBachelor1/17/11Medical LeaveF23675...NaN7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeNaN
\n", + "

5 rows × 25 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage education \\\n", + "0 DK49336 Arizona 4809.216960 No Basic College \n", + "1 KX64629 California 2228.525238 No Basic College \n", + "2 LZ68649 Washington 14947.917300 No Basic Bachelor \n", + "3 XL78013 Oregon 22332.439460 Yes Extended College \n", + "4 QA50777 Oregon 9025.067525 No Premium Bachelor \n", + "\n", + " effective_to_date employmentstatus gender income ... \\\n", + "0 2/18/11 Employed M 48029 ... \n", + "1 1/18/11 Unemployed F 0 ... \n", + "2 2/10/11 Employed M 22139 ... \n", + "3 1/11/11 Employed M 49078 ... \n", + "4 1/17/11 Medical Leave F 23675 ... \n", + "\n", + " number_of_open_complaints number_of_policies policy_type policy \\\n", + "0 0.0 9 Corporate Auto Corporate L3 \n", + "1 0.0 1 Personal Auto Personal L3 \n", + "2 0.0 2 Personal Auto Personal L3 \n", + "3 0.0 2 Corporate Auto Corporate L3 \n", + "4 NaN 7 Personal Auto Personal L2 \n", + "\n", + " renew_offer_type sales_channel total_claim_amount vehicle_class \\\n", + "0 Offer3 Agent 292.800000 Four-Door Car \n", + "1 Offer4 Call Center 744.924331 Four-Door Car \n", + "2 Offer3 Call Center 480.000000 SUV \n", + "3 Offer2 Branch 484.013411 Four-Door Car \n", + "4 Offer1 Branch 707.925645 Four-Door Car \n", + "\n", + " vehicle_size vehicle_type \n", + "0 Medsize NaN \n", + "1 Medsize NaN \n", + "2 Medsize A \n", + "3 Medsize A \n", + "4 Medsize NaN \n", + "\n", + "[5 rows x 25 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "df_marketing = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\")\n", + "df_marketing.columns = (\n", + " df_marketing.columns\n", + " .str.strip() # elimina espacios alrededor\n", + " .str.lower() # convierte a minúsculas\n", + " .str.replace(\" \", \"_\")\n", + " \n", + ")\n", + "\n", + "df_marketing = df_marketing.drop(columns=[\"unnamed:_0\"])\n", + "\n", + "display(df_marketing.head())" + ] + }, { "cell_type": "markdown", "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50", @@ -36,6 +273,405 @@ " - have a response \"Yes\" to the last marketing campaign." ] }, + { + "cell_type": "code", + "execution_count": 4, + "id": "144f1fb5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...number_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_type
3XL78013Oregon22332.439460YesExtendedCollege1/11/11EmployedM49078...0.02Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA
8FM55990California5989.773931YesPremiumCollege1/19/11EmployedM66839...0.01Personal AutoPersonal L1Offer2Branch739.200000Sports CarMedsizeNaN
15CW49887California4626.801093YesBasicMaster1/16/11EmployedF79487...0.01Special AutoSpecial L1Offer2Branch547.200000SUVMedsizeNaN
19NJ54277California3746.751625YesExtendedCollege2/26/11EmployedF41479...1.01Personal AutoPersonal L2Offer2Call Center19.575683Two-Door CarLargeA
27MQ68407Oregon4376.363592YesPremiumBachelor2/28/11EmployedF63774...0.01Personal AutoPersonal L3Offer2Agent60.036683Four-Door CarMedsizeNaN
..................................................................
10844FM31768Arizona5979.724161YesExtendedHigh School or Below2/7/11EmployedF57693...0.03Personal AutoPersonal L1Offer2Agent547.200000Four-Door CarMedsizeNaN
10852KZ80424Washington8382.478392YesBasicBachelor1/27/11EmployedM25998...0.02Personal AutoPersonal L2Offer2Call Center791.878042NaNNaNA
10872XT67997California5979.724161YesExtendedHigh School or Below2/7/11EmployedF57693...0.03Personal AutoPersonal L3Offer2Agent547.200000Four-Door CarMedsizeNaN
10887BY78730Oregon8879.790017YesBasicHigh School or Below2/3/11EmployedF52583...0.07Special AutoSpecial L2Offer1Agent528.200860SUVSmallA
10897MM70762Arizona9075.768214YesBasicMaster1/26/11EmployedM37722...0.08Personal AutoPersonal L1Offer1Agent158.077504Sports CarMedsizeA
\n", + "

1399 rows × 25 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage \\\n", + "3 XL78013 Oregon 22332.439460 Yes Extended \n", + "8 FM55990 California 5989.773931 Yes Premium \n", + "15 CW49887 California 4626.801093 Yes Basic \n", + "19 NJ54277 California 3746.751625 Yes Extended \n", + "27 MQ68407 Oregon 4376.363592 Yes Premium \n", + "... ... ... ... ... ... \n", + "10844 FM31768 Arizona 5979.724161 Yes Extended \n", + "10852 KZ80424 Washington 8382.478392 Yes Basic \n", + "10872 XT67997 California 5979.724161 Yes Extended \n", + "10887 BY78730 Oregon 8879.790017 Yes Basic \n", + "10897 MM70762 Arizona 9075.768214 Yes Basic \n", + "\n", + " education effective_to_date employmentstatus gender income \\\n", + "3 College 1/11/11 Employed M 49078 \n", + "8 College 1/19/11 Employed M 66839 \n", + "15 Master 1/16/11 Employed F 79487 \n", + "19 College 2/26/11 Employed F 41479 \n", + "27 Bachelor 2/28/11 Employed F 63774 \n", + "... ... ... ... ... ... \n", + "10844 High School or Below 2/7/11 Employed F 57693 \n", + "10852 Bachelor 1/27/11 Employed M 25998 \n", + "10872 High School or Below 2/7/11 Employed F 57693 \n", + "10887 High School or Below 2/3/11 Employed F 52583 \n", + "10897 Master 1/26/11 Employed M 37722 \n", + "\n", + " ... number_of_open_complaints number_of_policies policy_type \\\n", + "3 ... 0.0 2 Corporate Auto \n", + "8 ... 0.0 1 Personal Auto \n", + "15 ... 0.0 1 Special Auto \n", + "19 ... 1.0 1 Personal Auto \n", + "27 ... 0.0 1 Personal Auto \n", + "... ... ... ... ... \n", + "10844 ... 0.0 3 Personal Auto \n", + "10852 ... 0.0 2 Personal Auto \n", + "10872 ... 0.0 3 Personal Auto \n", + "10887 ... 0.0 7 Special Auto \n", + "10897 ... 0.0 8 Personal Auto \n", + "\n", + " policy renew_offer_type sales_channel total_claim_amount \\\n", + "3 Corporate L3 Offer2 Branch 484.013411 \n", + "8 Personal L1 Offer2 Branch 739.200000 \n", + "15 Special L1 Offer2 Branch 547.200000 \n", + "19 Personal L2 Offer2 Call Center 19.575683 \n", + "27 Personal L3 Offer2 Agent 60.036683 \n", + "... ... ... ... ... \n", + "10844 Personal L1 Offer2 Agent 547.200000 \n", + "10852 Personal L2 Offer2 Call Center 791.878042 \n", + "10872 Personal L3 Offer2 Agent 547.200000 \n", + "10887 Special L2 Offer1 Agent 528.200860 \n", + "10897 Personal L1 Offer1 Agent 158.077504 \n", + "\n", + " vehicle_class vehicle_size vehicle_type \n", + "3 Four-Door Car Medsize A \n", + "8 Sports Car Medsize NaN \n", + "15 SUV Medsize NaN \n", + "19 Two-Door Car Large A \n", + "27 Four-Door Car Medsize NaN \n", + "... ... ... ... \n", + "10844 Four-Door Car Medsize NaN \n", + "10852 NaN NaN A \n", + "10872 Four-Door Car Medsize NaN \n", + "10887 SUV Small A \n", + "10897 Sports Car Medsize A \n", + "\n", + "[1399 rows x 25 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_low = df_marketing [(df_marketing[\"total_claim_amount\"] < 1000) & (df_marketing[\"response\"] == \"Yes\")]\n", + "df_low" + ] + }, { "cell_type": "markdown", "id": "b9be383e-5165-436e-80c8-57d4c757c8c3", @@ -48,6 +684,115 @@ " - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company." ] }, + { + "cell_type": "code", + "execution_count": 8, + "id": "19783d79", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_lifetime_valuemonthly_premium_auto
policy_typegender
Corporate AutoF7712.6394.30
M7944.4792.19
Personal AutoF8339.7999.00
M7448.3891.09
Special AutoF7691.5892.31
M8247.0986.34
\n", + "
" + ], + "text/plain": [ + " customer_lifetime_value monthly_premium_auto\n", + "policy_type gender \n", + "Corporate Auto F 7712.63 94.30\n", + " M 7944.47 92.19\n", + "Personal Auto F 8339.79 99.00\n", + " M 7448.38 91.09\n", + "Special Auto F 7691.58 92.31\n", + " M 8247.09 86.34" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "\n", + "# Filtrar solo clientes que respondieron \"Yes\"\n", + "df_yes = df_marketing[df_marketing[\"response\"] == \"Yes\"]\n", + "\n", + "# Agrupar por policy_type y gender\n", + "pivot_result = (\n", + " df_yes\n", + " .groupby([\"policy_type\", \"gender\"])\n", + " [[\"customer_lifetime_value\", \"monthly_premium_auto\"]] # usa el nombre real de tu columna premium\n", + " .mean()\n", + " .round(2)\n", + ")\n", + "\n", + "display(pivot_result)\n" + ] + }, { "cell_type": "markdown", "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0", @@ -58,6 +803,38 @@ "3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers." ] }, + { + "cell_type": "code", + "execution_count": 12, + "id": "2f5f7b1b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "state\n", + "California 3552\n", + "Oregon 2909\n", + "Arizona 1937\n", + "Nevada 993\n", + "Washington 888\n", + "Name: count, dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Contar clientes por estado\n", + "state_counts = df_marketing[\"state\"].value_counts()\n", + "\n", + "# Filtrar solo los que tienen más de 500\n", + "state_counts_filtered = state_counts[state_counts > 500]\n", + "\n", + "display(state_counts_filtered)\n" + ] + }, { "cell_type": "markdown", "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d", @@ -68,6 +845,143 @@ "4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions." ] }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b6ee2716", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
maxminmedian
educationgender
BachelorF73225.961904.005640.51
M67907.271898.015548.03
CollegeF61850.191898.685623.61
M61134.681918.126005.85
DoctorF44856.112395.575332.46
M32677.342267.605577.67
High School or BelowF55277.452144.926039.55
M83325.381940.986286.73
MasterF51016.072417.785729.86
M50568.262272.315579.10
\n", + "
" + ], + "text/plain": [ + " max min median\n", + "education gender \n", + "Bachelor F 73225.96 1904.00 5640.51\n", + " M 67907.27 1898.01 5548.03\n", + "College F 61850.19 1898.68 5623.61\n", + " M 61134.68 1918.12 6005.85\n", + "Doctor F 44856.11 2395.57 5332.46\n", + " M 32677.34 2267.60 5577.67\n", + "High School or Below F 55277.45 2144.92 6039.55\n", + " M 83325.38 1940.98 6286.73\n", + "Master F 51016.07 2417.78 5729.86\n", + " M 50568.26 2272.31 5579.10" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "clv_stats = (\n", + " df_marketing.groupby([\"education\", \"gender\"])[\"customer_lifetime_value\"].agg([\"max\", \"min\", \"median\"]).round(2)\n", + ")\n", + "\n", + "display(clv_stats)\n" + ] + }, { "cell_type": "markdown", "id": "b42999f9-311f-481e-ae63-40a5577072c5", @@ -143,7 +1057,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -157,7 +1071,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.7" } }, "nbformat": 4,