diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb index fadd718..d8a0196 100644 --- a/lab-dw-aggregating.ipynb +++ b/lab-dw-aggregating.ipynb @@ -36,6 +36,535 @@ " - have a response \"Yes\" to the last marketing campaign." ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "3793817f", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Unnamed: 0", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Customer", + "rawType": "object", + "type": "string" + }, + { + "name": "State", + "rawType": "object", + "type": "string" + }, + { + "name": "Customer Lifetime Value", + "rawType": "float64", + "type": "float" + }, + { + "name": "Response", + "rawType": "object", + "type": "string" + }, + { + "name": "Coverage", + "rawType": "object", + "type": "string" + }, + { + "name": "Education", + "rawType": "object", + "type": "string" + }, + { + "name": "Effective To Date", + "rawType": "object", + "type": "string" + }, + { + "name": "EmploymentStatus", + "rawType": "object", + "type": "string" + }, + { + "name": "Gender", + "rawType": "object", + "type": "string" + }, + { + "name": "Income", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Location Code", + "rawType": "object", + "type": "string" + }, + { + "name": "Marital Status", + "rawType": "object", + "type": "string" + }, + { + "name": "Monthly Premium Auto", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Months Since Last Claim", + "rawType": "float64", + "type": "float" + }, + { + "name": "Months Since Policy Inception", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Number of Open Complaints", + "rawType": "float64", + "type": "float" + }, + { + "name": "Number of Policies", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Policy Type", + "rawType": "object", + "type": "string" + }, + { + "name": "Policy", + "rawType": "object", + "type": "string" + }, + { + "name": "Renew Offer Type", + "rawType": "object", + "type": "string" + }, + { + "name": "Sales Channel", + "rawType": "object", + "type": "string" + }, + { + "name": "Total Claim Amount", + "rawType": "float64", + "type": "float" + }, + { + "name": "Vehicle Class", + "rawType": "object", + "type": "string" + }, + { + "name": "Vehicle Size", + "rawType": "object", + "type": "string" + }, + { + "name": "Vehicle Type", + "rawType": "object", + "type": "unknown" + } + ], + "ref": "42836973-6d1d-4643-a590-df0cc2df8735", + "rows": [ + [ + "3", + "3", + "XL78013", + "Oregon", + "22332.43946", + "Yes", + "Extended", + "College", + "1/11/11", + "Employed", + "M", + "49078", + "Suburban", + "Single", + "97", + "10.0", + "3", + "0.0", + "2", + "Corporate Auto", + "Corporate L3", + "Offer2", + "Branch", + "484.013411", + "Four-Door Car", + "Medsize", + "A" + ], + [ + "8", + "8", + "FM55990", + "California", + "5989.773931", + "Yes", + "Premium", + "College", + "1/19/11", + "Employed", + "M", + "66839", + "Suburban", + "Single", + "154", + "33.0", + "24", + "0.0", + "1", + "Personal Auto", + "Personal L1", + "Offer2", + "Branch", + "739.2", + "Sports Car", + "Medsize", + null + ], + [ + "15", + "15", + "CW49887", + "California", + "4626.801093", + "Yes", + "Basic", + "Master", + "1/16/11", + "Employed", + "F", + "79487", + "Suburban", + "Divorced", + "114", + "20.0", + "87", + "0.0", + "1", + "Special Auto", + "Special L1", + "Offer2", + "Branch", + "547.2", + "SUV", + "Medsize", + null + ], + [ + "19", + "19", + "NJ54277", + "California", + "3746.751625", + "Yes", + "Extended", + "College", + "2/26/11", + "Employed", + "F", + "41479", + "Rural", + "Married", + "94", + "14.0", + "38", + "1.0", + "1", + "Personal Auto", + "Personal L2", + "Offer2", + "Call Center", + "19.575683", + "Two-Door Car", + "Large", + "A" + ], + [ + "27", + "27", + "MQ68407", + "Oregon", + "4376.363592", + "Yes", + "Premium", + "Bachelor", + "2/28/11", + "Employed", + "F", + "63774", + "Rural", + "Divorced", + "111", + "18.0", + "63", + "0.0", + "1", + "Personal Auto", + "Personal L3", + "Offer2", + "Agent", + "60.036683", + "Four-Door Car", + "Medsize", + null + ] + ], + "shape": { + "columns": 26, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0CustomerStateCustomer Lifetime ValueResponseCoverageEducationEffective To DateEmploymentStatusGender...Number of Open ComplaintsNumber of PoliciesPolicy TypePolicyRenew Offer TypeSales ChannelTotal Claim AmountVehicle ClassVehicle SizeVehicle Type
33XL78013Oregon22332.439460YesExtendedCollege1/11/11EmployedM...0.02Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA
88FM55990California5989.773931YesPremiumCollege1/19/11EmployedM...0.01Personal AutoPersonal L1Offer2Branch739.200000Sports CarMedsizeNaN
1515CW49887California4626.801093YesBasicMaster1/16/11EmployedF...0.01Special AutoSpecial L1Offer2Branch547.200000SUVMedsizeNaN
1919NJ54277California3746.751625YesExtendedCollege2/26/11EmployedF...1.01Personal AutoPersonal L2Offer2Call Center19.575683Two-Door CarLargeA
2727MQ68407Oregon4376.363592YesPremiumBachelor2/28/11EmployedF...0.01Personal AutoPersonal L3Offer2Agent60.036683Four-Door CarMedsizeNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 Customer State Customer Lifetime Value Response \\\n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "8 8 FM55990 California 5989.773931 Yes \n", + "15 15 CW49887 California 4626.801093 Yes \n", + "19 19 NJ54277 California 3746.751625 Yes \n", + "27 27 MQ68407 Oregon 4376.363592 Yes \n", + "\n", + " Coverage Education Effective To Date EmploymentStatus Gender ... \\\n", + "3 Extended College 1/11/11 Employed M ... \n", + "8 Premium College 1/19/11 Employed M ... \n", + "15 Basic Master 1/16/11 Employed F ... \n", + "19 Extended College 2/26/11 Employed F ... \n", + "27 Premium Bachelor 2/28/11 Employed F ... \n", + "\n", + " Number of Open Complaints Number of Policies Policy Type \\\n", + "3 0.0 2 Corporate Auto \n", + "8 0.0 1 Personal Auto \n", + "15 0.0 1 Special Auto \n", + "19 1.0 1 Personal Auto \n", + "27 0.0 1 Personal Auto \n", + "\n", + " Policy Renew Offer Type Sales Channel Total Claim Amount \\\n", + "3 Corporate L3 Offer2 Branch 484.013411 \n", + "8 Personal L1 Offer2 Branch 739.200000 \n", + "15 Special L1 Offer2 Branch 547.200000 \n", + "19 Personal L2 Offer2 Call Center 19.575683 \n", + "27 Personal L3 Offer2 Agent 60.036683 \n", + "\n", + " Vehicle Class Vehicle Size Vehicle Type \n", + "3 Four-Door Car Medsize A \n", + "8 Sports Car Medsize NaN \n", + "15 SUV Medsize NaN \n", + "19 Two-Door Car Large A \n", + "27 Four-Door Car Medsize NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "\n", + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n", + "df = pd.read_csv(url)\n", + "\n", + "df[\"Response\"] = df[\"Response\"].str.strip().str.title()\n", + "df[\"Total Claim Amount\"] = pd.to_numeric(df[\"Total Claim Amount\"], errors=\"coerce\")\n", + "\n", + "df_filtered = df[\n", + " (df[\"Total Claim Amount\"] < 1000) &\n", + " (df[\"Response\"] == \"Yes\")\n", + "].copy()\n", + "\n", + "df_filtered.head()\n" + ] + }, { "cell_type": "markdown", "id": "b9be383e-5165-436e-80c8-57d4c757c8c3", @@ -48,6 +577,200 @@ " - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company." ] }, + { + "cell_type": "code", + "execution_count": 4, + "id": "756b6d55", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "('Policy Type', 'Gender')", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Customer Lifetime Value", + "rawType": "float64", + "type": "float" + }, + { + "name": "Monthly Premium Auto", + "rawType": "float64", + "type": "float" + }, + { + "name": "Total Claim Amount", + "rawType": "float64", + "type": "float" + } + ], + "ref": "fe4da37a-c66c-44f1-8a29-fc340af13d96", + "rows": [ + [ + "('Corporate Auto', 'F')", + "7712.63", + "94.3", + "433.74" + ], + [ + "('Corporate Auto', 'M')", + "7944.47", + "92.19", + "408.58" + ], + [ + "('Personal Auto', 'F')", + "8339.79", + "99.0", + "452.97" + ], + [ + "('Personal Auto', 'M')", + "7448.38", + "91.09", + "457.01" + ], + [ + "('Special Auto', 'F')", + "7691.58", + "92.31", + "453.28" + ], + [ + "('Special Auto', 'M')", + "8247.09", + "86.34", + "429.53" + ] + ], + "shape": { + "columns": 3, + "rows": 6 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Customer Lifetime ValueMonthly Premium AutoTotal Claim Amount
Policy TypeGender
Corporate AutoF7712.6394.30433.74
M7944.4792.19408.58
Personal AutoF8339.7999.00452.97
M7448.3891.09457.01
Special AutoF7691.5892.31453.28
M8247.0986.34429.53
\n", + "
" + ], + "text/plain": [ + " Customer Lifetime Value Monthly Premium Auto \\\n", + "Policy Type Gender \n", + "Corporate Auto F 7712.63 94.30 \n", + " M 7944.47 92.19 \n", + "Personal Auto F 8339.79 99.00 \n", + " M 7448.38 91.09 \n", + "Special Auto F 7691.58 92.31 \n", + " M 8247.09 86.34 \n", + "\n", + " Total Claim Amount \n", + "Policy Type Gender \n", + "Corporate Auto F 433.74 \n", + " M 408.58 \n", + "Personal Auto F 452.97 \n", + " M 457.01 \n", + "Special Auto F 453.28 \n", + " M 429.53 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pivot = df[df[\"Response\"]==\"Yes\"].pivot_table(\n", + " values=[\"Monthly Premium Auto\",\"Customer Lifetime Value\",\"Total Claim Amount\"],\n", + " index=[\"Policy Type\",\"Gender\"],\n", + " aggfunc=\"mean\"\n", + ").round(2)\n", + "\n", + "pivot" + ] + }, + { + "cell_type": "markdown", + "id": "b3529add", + "metadata": {}, + "source": [ + "Los clientes que respondieron “Yes” y pertenecen a Corporate Auto son claramente los más rentables: pagan primas más altas, presentan un valor de vida del cliente superior y generan importes de siniestros más bajos. El segmento Special Auto también es positivo, aunque con un riesgo algo mayor. En cambio, los clientes de Personal Auto resultan menos atractivos: pagan menos, tienen un CLV más bajo y sus siniestros son proporcionalmente más altos, lo que los convierte en el segmento de menor valor para la compañía." + ] + }, { "cell_type": "markdown", "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0", @@ -58,6 +781,46 @@ "3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers." ] }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ed6ac4fe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(State\n", + " California 3552\n", + " Oregon 2909\n", + " Arizona 1937\n", + " Nevada 993\n", + " Washington 888\n", + " Name: count, dtype: int64,\n", + " State\n", + " California 3552\n", + " Oregon 2909\n", + " Arizona 1937\n", + " Nevada 993\n", + " Washington 888\n", + " Name: count, dtype: int64)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Número de clientes por estado\n", + "state_counts = df[\"State\"].value_counts()\n", + "\n", + "# Filtrar solo estados con más de 500 clientes\n", + "states_over_500 = state_counts[state_counts > 500]\n", + "\n", + "state_counts, states_over_500" + ] + }, { "cell_type": "markdown", "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d", @@ -68,6 +831,271 @@ "4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions." ] }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3cf23d0c", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "('Education', 'Gender')", + "rawType": "object", + "type": "unknown" + }, + { + "name": "('max', 'Customer Lifetime Value')", + "rawType": "float64", + "type": "float" + }, + { + "name": "('min', 'Customer Lifetime Value')", + "rawType": "float64", + "type": "float" + }, + { + "name": "('median', 'Customer Lifetime Value')", + "rawType": "float64", + "type": "float" + } + ], + "ref": "8644bce4-aa0e-4816-b434-48372e811055", + "rows": [ + [ + "('Bachelor', 'F')", + "73225.96", + "1904.0", + "5640.51" + ], + [ + "('Bachelor', 'M')", + "67907.27", + "1898.01", + "5548.03" + ], + [ + "('College', 'F')", + "61850.19", + "1898.68", + "5623.61" + ], + [ + "('College', 'M')", + "61134.68", + "1918.12", + "6005.85" + ], + [ + "('Doctor', 'F')", + "44856.11", + "2395.57", + "5332.46" + ], + [ + "('Doctor', 'M')", + "32677.34", + "2267.6", + "5577.67" + ], + [ + "('High School or Below', 'F')", + "55277.45", + "2144.92", + "6039.55" + ], + [ + "('High School or Below', 'M')", + "83325.38", + "1940.98", + "6286.73" + ], + [ + "('Master', 'F')", + "51016.07", + "2417.78", + "5729.86" + ], + [ + "('Master', 'M')", + "50568.26", + "2272.31", + "5579.1" + ] + ], + "shape": { + "columns": 3, + "rows": 10 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
maxminmedian
Customer Lifetime ValueCustomer Lifetime ValueCustomer Lifetime Value
EducationGender
BachelorF73225.961904.005640.51
M67907.271898.015548.03
CollegeF61850.191898.685623.61
M61134.681918.126005.85
DoctorF44856.112395.575332.46
M32677.342267.605577.67
High School or BelowF55277.452144.926039.55
M83325.381940.986286.73
MasterF51016.072417.785729.86
M50568.262272.315579.10
\n", + "
" + ], + "text/plain": [ + " max min \\\n", + " Customer Lifetime Value Customer Lifetime Value \n", + "Education Gender \n", + "Bachelor F 73225.96 1904.00 \n", + " M 67907.27 1898.01 \n", + "College F 61850.19 1898.68 \n", + " M 61134.68 1918.12 \n", + "Doctor F 44856.11 2395.57 \n", + " M 32677.34 2267.60 \n", + "High School or Below F 55277.45 2144.92 \n", + " M 83325.38 1940.98 \n", + "Master F 51016.07 2417.78 \n", + " M 50568.26 2272.31 \n", + "\n", + " median \n", + " Customer Lifetime Value \n", + "Education Gender \n", + "Bachelor F 5640.51 \n", + " M 5548.03 \n", + "College F 5623.61 \n", + " M 6005.85 \n", + "Doctor F 5332.46 \n", + " M 5577.67 \n", + "High School or Below F 6039.55 \n", + " M 6286.73 \n", + "Master F 5729.86 \n", + " M 5579.10 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clv_stats = df.pivot_table(\n", + " values=\"Customer Lifetime Value\",\n", + " index=[\"Education\", \"Gender\"],\n", + " aggfunc=[\"max\", \"min\", \"median\"]\n", + ").round(2)\n", + "\n", + "clv_stats" + ] + }, + { + "cell_type": "markdown", + "id": "fdde83a1", + "metadata": {}, + "source": [ + "Los clientes con mayor nivel educativo suelen presentar los valores máximos y medianos de CLV más altos, especialmente en el segmento Graduate y Doctor. Las diferencias por género son pequeñas, pero las mujeres tienden a mostrar medianas de CLV ligeramente superiores en varios niveles de educación. En los niveles educativos más bajos, los valores mínimos y medianos de CLV son considerablemente menores, lo que indica menor valor a largo plazo para la compañía." + ] + }, { "cell_type": "markdown", "id": "b42999f9-311f-481e-ae63-40a5577072c5", @@ -143,7 +1171,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -157,7 +1185,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.7" } }, "nbformat": 4,