From 86dc973c76b60f5175facfa202f2a740df9fe984 Mon Sep 17 00:00:00 2001 From: martin-paz-y Date: Sat, 20 Sep 2025 16:59:29 +0200 Subject: [PATCH 1/2] Solved Lab --- lab-dw-data-structuring-and-combining.ipynb | 751 +++++++++++++++++++- 1 file changed, 742 insertions(+), 9 deletions(-) diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb index ec4e3f9..d8619b8 100644 --- a/lab-dw-data-structuring-and-combining.ipynb +++ b/lab-dw-data-structuring-and-combining.ipynb @@ -36,14 +36,305 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "id": "492d06e3-92c7-4105-ac72-536db98d3244", "metadata": { "id": "492d06e3-92c7-4105-ac72-536db98d3244" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amountst
0QZ44356ArizonaFBachelor697953.590.094.00.0Personal AutoFour-Door Car1131.464935NaN
1AI49188NevadaFBachelor1288743.1748767.0108.00.0Personal AutoTwo-Door Car566.472247NaN
2WW63253CaliforniaMBachelor764586.180.0106.00.0Corporate AutoSUV529.881344NaN
3GA49547WashingtonMHigh School or Below536307.6536357.068.00.0Personal AutoFour-Door Car17.269323NaN
4OC83172OregonFBachelor825629.7862902.069.00.0Personal AutoTwo-Door Car159.383042NaN
.......................................
9015LA72316CaliforniaMBachelor23405.9879871941.073.00Personal AutoFour-Door Car198.234764NaN
9016PK87824CaliforniaFCollege3096.51121721604.079.00Corporate AutoFour-Door Car379.200000NaN
9017TD14365CaliforniaMBachelor8163.8904280.085.03Corporate AutoFour-Door Car790.784983NaN
9018UP19263CaliforniaMCollege7524.44243621941.096.00Personal AutoFour-Door Car691.200000NaN
9019Y167826CaliforniaMCollege2611.8368660.077.00Corporate AutoTwo-Door Car369.600000NaN
\n", + "

9020 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " customer state gender education \\\n", + "0 QZ44356 Arizona F Bachelor \n", + "1 AI49188 Nevada F Bachelor \n", + "2 WW63253 California M Bachelor \n", + "3 GA49547 Washington M High School or Below \n", + "4 OC83172 Oregon F Bachelor \n", + "... ... ... ... ... \n", + "9015 LA72316 California M Bachelor \n", + "9016 PK87824 California F College \n", + "9017 TD14365 California M Bachelor \n", + "9018 UP19263 California M College \n", + "9019 Y167826 California M College \n", + "\n", + " customer_lifetime_value income monthly_premium_auto \\\n", + "0 697953.59 0.0 94.0 \n", + "1 1288743.17 48767.0 108.0 \n", + "2 764586.18 0.0 106.0 \n", + "3 536307.65 36357.0 68.0 \n", + "4 825629.78 62902.0 69.0 \n", + "... ... ... ... \n", + "9015 23405.98798 71941.0 73.0 \n", + "9016 3096.511217 21604.0 79.0 \n", + "9017 8163.890428 0.0 85.0 \n", + "9018 7524.442436 21941.0 96.0 \n", + "9019 2611.836866 0.0 77.0 \n", + "\n", + " number_of_open_complaints policy_type vehicle_class \\\n", + "0 0.0 Personal Auto Four-Door Car \n", + "1 0.0 Personal Auto Two-Door Car \n", + "2 0.0 Corporate Auto SUV \n", + "3 0.0 Personal Auto Four-Door Car \n", + "4 0.0 Personal Auto Two-Door Car \n", + "... ... ... ... \n", + "9015 0 Personal Auto Four-Door Car \n", + "9016 0 Corporate Auto Four-Door Car \n", + "9017 3 Corporate Auto Four-Door Car \n", + "9018 0 Personal Auto Four-Door Car \n", + "9019 0 Corporate Auto Two-Door Car \n", + "\n", + " total_claim_amount st \n", + "0 1131.464935 NaN \n", + "1 566.472247 NaN \n", + "2 529.881344 NaN \n", + "3 17.269323 NaN \n", + "4 159.383042 NaN \n", + "... ... ... \n", + "9015 198.234764 NaN \n", + "9016 379.200000 NaN \n", + "9017 790.784983 NaN \n", + "9018 691.200000 NaN \n", + "9019 369.600000 NaN \n", + "\n", + "[9020 rows x 12 columns]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code goes here" + "import pandas as pd\n", + "\n", + "# Crear 3 DataFrames a partir de las URLs\n", + "df_sales = pd.read_csv(\"https://drive.google.com/uc?export=download&id=1I56oecN_u7G09geriYO02EL4GNuLJGsi\")\n", + "df_sales_2 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\")\n", + "df_sales_3 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\")\n", + "\n", + "df_sales_2.columns = (\n", + " df_sales_2.columns\n", + " .str.strip() # elimina espacios alrededor\n", + " .str.lower() # convierte a minúsculas\n", + " .str.replace(\" \", \"_\")\n", + ")\n", + "\n", + "df_sales_3.columns = (\n", + " df_sales_3.columns\n", + " .str.strip() # elimina espacios alrededor\n", + " .str.lower() # convierte a minúsculas\n", + " .str.replace(\" \", \"_\")\n", + ")\n", + "\n", + "# Concatenar los tres en un solo DataFrame\n", + "data_df = pd.concat([df_sales, df_sales_2, df_sales_3], axis=0, ignore_index=True)\n", + "\n", + "data_df\n" ] }, { @@ -72,14 +363,456 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26", "metadata": { "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
0DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM48029...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
1KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF0...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
2LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM22139...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
3XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM49078...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
4QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF23675...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage education \\\n", + "0 DK49336 Arizona 4809.216960 No Basic College \n", + "1 KX64629 California 2228.525238 No Basic College \n", + "2 LZ68649 Washington 14947.917300 No Basic Bachelor \n", + "3 XL78013 Oregon 22332.439460 Yes Extended College \n", + "4 QA50777 Oregon 9025.067525 No Premium Bachelor \n", + "\n", + " effective_to_date employmentstatus gender income ... number_of_policies \\\n", + "0 2011-02-18 Employed M 48029 ... 9 \n", + "1 2011-01-18 Unemployed F 0 ... 1 \n", + "2 2011-02-10 Employed M 22139 ... 2 \n", + "3 2011-01-11 Employed M 49078 ... 2 \n", + "4 2011-01-17 Medical Leave F 23675 ... 7 \n", + "\n", + " policy_type policy renew_offer_type sales_channel \\\n", + "0 Corporate Auto Corporate L3 Offer3 Agent \n", + "1 Personal Auto Personal L3 Offer4 Call Center \n", + "2 Personal Auto Personal L3 Offer3 Call Center \n", + "3 Corporate Auto Corporate L3 Offer2 Branch \n", + "4 Personal Auto Personal L2 Offer1 Branch \n", + "\n", + " total_claim_amount vehicle_class vehicle_size vehicle_type month \n", + "0 292.800000 Four-Door Car Medsize A 2 \n", + "1 744.924331 Four-Door Car Medsize A 1 \n", + "2 480.000000 SUV Medsize A 2 \n", + "3 484.013411 Four-Door Car Medsize A 1 \n", + "4 707.925645 Four-Door Car Medsize A 1 \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "# Your code goes here" + "import pandas as pd\n", + "\n", + "df_sales_4 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\")\n", + "df_sales_4.columns = (\n", + " df_sales_4.columns\n", + " .str.strip() # elimina espacios alrededor\n", + " .str.lower() # convierte a minúsculas\n", + " .str.replace(\" \", \"_\")\n", + ")\n", + "df_sales_4 = df_sales_4.drop(columns=[\"unnamed:_0\"])\n", + "\n", + "display(df_sales_4.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "4f498756", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstate_xcustomer_lifetime_value_xresponsecoverageeducation_xeffective_to_dateemploymentstatusgender_xincome_x...gender_yeducation_ycustomer_lifetime_value_yincome_ymonthly_premium_auto_ynumber_of_open_complaints_ypolicy_type_yvehicle_class_ytotal_claim_amount_yst
0DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM48029...MCollege4809.2169648029.061.00Corporate AutoFour-Door Car292.800000NaN
1KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF0...FCollege2228.5252380.064.00Personal AutoFour-Door Car744.924331NaN
2LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM22139...MBachelor14947.917322139.0100.00Personal AutoSUV480.000000NaN
3XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM49078...MCollege22332.4394649078.097.00Corporate AutoFour-Door Car484.013411NaN
4QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF23675...FBachelor9025.06752523675.0117.00Personal AutoFour-Door Car707.925645NaN
\n", + "

5 rows × 37 columns

\n", + "
" + ], + "text/plain": [ + " customer state_x customer_lifetime_value_x response coverage \\\n", + "0 DK49336 Arizona 4809.216960 No Basic \n", + "1 KX64629 California 2228.525238 No Basic \n", + "2 LZ68649 Washington 14947.917300 No Basic \n", + "3 XL78013 Oregon 22332.439460 Yes Extended \n", + "4 QA50777 Oregon 9025.067525 No Premium \n", + "\n", + " education_x effective_to_date employmentstatus gender_x income_x ... \\\n", + "0 College 2011-02-18 Employed M 48029 ... \n", + "1 College 2011-01-18 Unemployed F 0 ... \n", + "2 Bachelor 2011-02-10 Employed M 22139 ... \n", + "3 College 2011-01-11 Employed M 49078 ... \n", + "4 Bachelor 2011-01-17 Medical Leave F 23675 ... \n", + "\n", + " gender_y education_y customer_lifetime_value_y income_y \\\n", + "0 M College 4809.21696 48029.0 \n", + "1 F College 2228.525238 0.0 \n", + "2 M Bachelor 14947.9173 22139.0 \n", + "3 M College 22332.43946 49078.0 \n", + "4 F Bachelor 9025.067525 23675.0 \n", + "\n", + " monthly_premium_auto_y number_of_open_complaints_y policy_type_y \\\n", + "0 61.0 0 Corporate Auto \n", + "1 64.0 0 Personal Auto \n", + "2 100.0 0 Personal Auto \n", + "3 97.0 0 Corporate Auto \n", + "4 117.0 0 Personal Auto \n", + "\n", + " vehicle_class_y total_claim_amount_y st \n", + "0 Four-Door Car 292.800000 NaN \n", + "1 Four-Door Car 744.924331 NaN \n", + "2 SUV 480.000000 NaN \n", + "3 Four-Door Car 484.013411 NaN \n", + "4 Four-Door Car 707.925645 NaN \n", + "\n", + "[5 rows x 37 columns]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "full_set = pd.merge(df_sales_4, data_df, on='customer',how='left')\n", + "full_set.head()" ] }, { @@ -130,7 +863,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "3a069e0b-b400-470e-904d-d17582191be4", "metadata": { "id": "3a069e0b-b400-470e-904d-d17582191be4" @@ -146,7 +879,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -160,7 +893,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.7" } }, "nbformat": 4, From 6d8b62d7c5c46a1cb954549bdf19d159bba39c38 Mon Sep 17 00:00:00 2001 From: martin-paz-y Date: Sat, 20 Sep 2025 17:21:16 +0200 Subject: [PATCH 2/2] Solved Lab --- lab-dw-data-structuring-and-combining.ipynb | 105 ++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb index d8619b8..3d335bf 100644 --- a/lab-dw-data-structuring-and-combining.ipynb +++ b/lab-dw-data-structuring-and-combining.ipynb @@ -826,6 +826,39 @@ "Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights." ] }, + { + "cell_type": "code", + "execution_count": 51, + "id": "1e24baa0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " total_claim_amount_y\n", + "sales_channel \n", + "Agent 1777960.36\n", + "Branch 1270482.03\n", + "Call Center 905278.74\n", + "Web 695431.06\n" + ] + } + ], + "source": [ + "pivot_table = pd.pivot_table(\n", + " full_set ,\n", + " values=\"total_claim_amount_y\", # métrica de ingresos\n", + " index=\"sales_channel\", # canal de ventas\n", + " aggfunc=\"sum\" # sumamos el revenue\n", + ")\n", + "\n", + "# Redondeamos a 2 decimales\n", + "pivot_table = pivot_table.round(2)\n", + "\n", + "print(pivot_table)" + ] + }, { "cell_type": "markdown", "id": "640993b2-a291-436c-a34d-a551144f8196", @@ -836,6 +869,78 @@ "2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights." ] }, + { + "cell_type": "code", + "execution_count": 56, + "id": "17368867", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# 1) Limpiar símbolos y espacios\n", + "s = (\n", + " full_set[\"customer_lifetime_value_y\"]\n", + " .astype(str)\n", + " .str.replace(\"%\", \"\", regex=False)\n", + " .str.replace(\",\", \"\", regex=False)\n", + " .str.strip()\n", + ")\n", + "\n", + "# 2) Convertir strings que representan vacío a NaN reales\n", + "s = s.replace(\n", + " {\n", + " \"\": np.nan,\n", + " \"nan\": np.nan,\n", + " \"NaN\": np.nan,\n", + " \"None\": np.nan,\n", + " \"NULL\": np.nan,\n", + " \"null\": np.nan,\n", + " }\n", + ")\n", + "\n", + "full_set[\"customer_lifetime_value_y\"] = pd.to_numeric(s, errors=\"raise\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "0d635189", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "education_y Bachelor Bachelors College Doctor High School or Below \\\n", + "gender_y \n", + "F 168087.18 475344.97 201455.39 189443.23 183235.94 \n", + "M 147379.51 487943.85 176922.75 154671.27 182380.65 \n", + "Male 528511.87 NaN NaN NaN NaN \n", + "female NaN NaN 248772.97 NaN 253774.45 \n", + "\n", + "education_y Master \n", + "gender_y \n", + "F 191811.85 \n", + "M 151575.56 \n", + "Male NaN \n", + "female NaN \n" + ] + } + ], + "source": [ + "\n", + "# Pivot table: promedio de CLV por género y nivel educativo\n", + "pivot_clv = pd.pivot_table(\n", + " full_set,\n", + " values=\"customer_lifetime_value_y\", # columna CLV (ajusta si el nombre varía)\n", + " index=\"gender_y\", # filas = género\n", + " columns=\"education_y\", # columnas = educación\n", + " aggfunc=\"mean\" # promedio\n", + ").round(2)\n", + "\n", + "print(pivot_clv)\n" + ] + }, { "cell_type": "markdown", "id": "32c7f2e5-3d90-43e5-be33-9781b6069198",