From 045d98c4cf8a5bd9c3227785ab4c92fab8c2dd82 Mon Sep 17 00:00:00 2001 From: Miguel Florindo Date: Sat, 27 Sep 2025 15:12:56 +0100 Subject: [PATCH] finished Lab --- lab-dw-data-structuring-and-combining.ipynb | 613 +++++++++++++++++++- 1 file changed, 606 insertions(+), 7 deletions(-) diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb index ec4e3f9..b75b673 100644 --- a/lab-dw-data-structuring-and-combining.ipynb +++ b/lab-dw-data-structuring-and-combining.ipynb @@ -36,14 +36,64 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "492d06e3-92c7-4105-ac72-536db98d3244", "metadata": { "id": "492d06e3-92c7-4105-ac72-536db98d3244" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Customer ST GENDER Education Customer Lifetime Value \\\n", + "0 RB50392 Washington NaN Master NaN \n", + "1 QZ44356 Arizona F Bachelor 697953.59% \n", + "2 AI49188 Nevada F Bachelor 1288743.17% \n", + "3 WW63253 California M Bachelor 764586.18% \n", + "4 GA49547 Washington M High School or Below 536307.65% \n", + "\n", + " Income Monthly Premium Auto Number of Open Complaints Policy Type \\\n", + "0 0.0 1000.0 1/0/00 Personal Auto \n", + "1 0.0 94.0 1/0/00 Personal Auto \n", + "2 48767.0 108.0 1/0/00 Personal Auto \n", + "3 0.0 106.0 1/0/00 Corporate Auto \n", + "4 36357.0 68.0 1/0/00 Personal Auto \n", + "\n", + " Vehicle Class Total Claim Amount State Gender \n", + "0 Four-Door Car 2.704934 NaN NaN \n", + "1 Four-Door Car 1131.464935 NaN NaN \n", + "2 Two-Door Car 566.472247 NaN NaN \n", + "3 SUV 529.881344 NaN NaN \n", + "4 Four-Door Car 17.269323 NaN NaN \n", + "Total cleaned rows: 9135\n" + ] + } + ], "source": [ - "# Your code goes here" + "import pandas as pd\n", + "import numpy as np\n", + "urls = [\n", + " \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\",\n", + " \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\",\n", + " \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\"\n", + "]\n", + "dataframes = []\n", + "for url in urls:\n", + " df = pd.read_csv(url)\n", + " dataframes.append(df)\n", + "\n", + "combined_df = pd.concat(dataframes, ignore_index=True)\n", + "\n", + "combined_df.drop_duplicates(inplace=True)\n", + "\n", + "combined_df.fillna({\n", + " 'column_a': 'Unknown',\n", + " 'column_b': 0\n", + "}, inplace=True)\n", + "\n", + "print(combined_df.head())\n", + "print(f\"Total cleaned rows: {len(combined_df)}\")" ] }, { @@ -72,14 +122,505 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26", "metadata": { "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26" }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First few rows:\n", + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 9 Corporate Auto Corporate L3 Offer3 \n", + "1 1 Personal Auto Personal L3 Offer4 \n", + "2 2 Personal Auto Personal L3 Offer3 \n", + "3 2 Corporate Auto Corporate L3 Offer2 \n", + "4 7 Personal Auto Personal L2 Offer1 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "0 Agent 292.800000 Four-Door Car Medsize \n", + "1 Call Center 744.924331 Four-Door Car Medsize \n", + "2 Call Center 480.000000 SUV Medsize \n", + "3 Branch 484.013411 Four-Door Car Medsize \n", + "4 Branch 707.925645 Four-Door Car Medsize \n", + "\n", + " vehicle_type month \n", + "0 A 2 \n", + "1 A 1 \n", + "2 A 2 \n", + "3 A 1 \n", + "4 A 1 \n", + "\n", + "[5 rows x 27 columns]\n", + "\n", + "Dataset info:\n", + "\n", + "RangeIndex: 10910 entries, 0 to 10909\n", + "Data columns (total 27 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 unnamed:_0 10910 non-null int64 \n", + " 1 customer 10910 non-null object \n", + " 2 state 10910 non-null object \n", + " 3 customer_lifetime_value 10910 non-null float64\n", + " 4 response 10910 non-null object \n", + " 5 coverage 10910 non-null object \n", + " 6 education 10910 non-null object \n", + " 7 effective_to_date 10910 non-null object \n", + " 8 employmentstatus 10910 non-null object \n", + " 9 gender 10910 non-null object \n", + " 10 income 10910 non-null int64 \n", + " 11 location_code 10910 non-null object \n", + " 12 marital_status 10910 non-null object \n", + " 13 monthly_premium_auto 10910 non-null int64 \n", + " 14 months_since_last_claim 10910 non-null float64\n", + " 15 months_since_policy_inception 10910 non-null int64 \n", + " 16 number_of_open_complaints 10910 non-null float64\n", + " 17 number_of_policies 10910 non-null int64 \n", + " 18 policy_type 10910 non-null object \n", + " 19 policy 10910 non-null object \n", + " 20 renew_offer_type 10910 non-null object \n", + " 21 sales_channel 10910 non-null object \n", + " 22 total_claim_amount 10910 non-null float64\n", + " 23 vehicle_class 10910 non-null object \n", + " 24 vehicle_size 10910 non-null object \n", + " 25 vehicle_type 10910 non-null object \n", + " 26 month 10910 non-null int64 \n", + "dtypes: float64(4), int64(6), object(17)\n", + "memory usage: 2.2+ MB\n", + "None\n", + "\n", + "Summary statistics:\n", + " unnamed:_0 customer_lifetime_value income \\\n", + "count 10910.000000 10910.000000 10910.000000 \n", + "mean 5454.500000 8018.241094 37536.284785 \n", + "std 3149.590053 6885.081434 30359.195670 \n", + "min 0.000000 1898.007675 0.000000 \n", + "25% 2727.250000 4014.453113 0.000000 \n", + "50% 5454.500000 5771.147235 33813.500000 \n", + "75% 8181.750000 8992.779137 62250.750000 \n", + "max 10909.000000 83325.381190 99981.000000 \n", + "\n", + " monthly_premium_auto months_since_last_claim \\\n", + "count 10910.000000 10910.000000 \n", + "mean 93.196059 15.149071 \n", + "std 34.442532 9.783520 \n", + "min 61.000000 0.000000 \n", + "25% 68.000000 7.000000 \n", + "50% 83.000000 15.000000 \n", + "75% 109.000000 23.000000 \n", + "max 298.000000 35.000000 \n", + "\n", + " months_since_policy_inception number_of_open_complaints \\\n", + "count 10910.000000 10910.000000 \n", + "mean 48.091934 0.384256 \n", + "std 27.940675 0.885589 \n", + "min 0.000000 0.000000 \n", + "25% 24.000000 0.000000 \n", + "50% 48.000000 0.000000 \n", + "75% 71.000000 0.384256 \n", + "max 99.000000 5.000000 \n", + "\n", + " number_of_policies total_claim_amount month \n", + "count 10910.000000 10910.000000 10910.000000 \n", + "mean 2.979193 434.888330 1.466728 \n", + "std 2.399359 292.180556 0.498915 \n", + "min 1.000000 0.099007 1.000000 \n", + "25% 1.000000 271.082527 1.000000 \n", + "50% 2.000000 382.564630 1.000000 \n", + "75% 4.000000 547.200000 2.000000 \n", + "max 9.000000 2893.239678 2.000000 \n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Load the dataset directly from the URL\n", + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\"\n", + "df = pd.read_csv(url)\n", + "\n", + "# Display basic info\n", + "print(\"First few rows:\")\n", + "print(df.head())\n", + "\n", + "print(\"\\nDataset info:\")\n", + "print(df.info())\n", + "\n", + "print(\"\\nSummary statistics:\")\n", + "print(df.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5af0f538", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape: (10910, 27)\n", + "Columns: ['unnamed:_0', 'customer', 'state', 'customer_lifetime_value', 'response', 'coverage', 'education', 'effective_to_date', 'employmentstatus', 'gender', 'income', 'location_code', 'marital_status', 'monthly_premium_auto', 'months_since_last_claim', 'months_since_policy_inception', 'number_of_open_complaints', 'number_of_policies', 'policy_type', 'policy', 'renew_offer_type', 'sales_channel', 'total_claim_amount', 'vehicle_class', 'vehicle_size', 'vehicle_type', 'month']\n", + "\n", + "Missing values per column:\n", + "unnamed:_0 0\n", + "customer 0\n", + "state 0\n", + "customer_lifetime_value 0\n", + "response 0\n", + "coverage 0\n", + "education 0\n", + "effective_to_date 0\n", + "employmentstatus 0\n", + "gender 0\n", + "income 0\n", + "location_code 0\n", + "marital_status 0\n", + "monthly_premium_auto 0\n", + "months_since_last_claim 0\n", + "months_since_policy_inception 0\n", + "number_of_open_complaints 0\n", + "number_of_policies 0\n", + "policy_type 0\n", + "policy 0\n", + "renew_offer_type 0\n", + "sales_channel 0\n", + "total_claim_amount 0\n", + "vehicle_class 0\n", + "vehicle_size 0\n", + "vehicle_type 0\n", + "month 0\n", + "dtype: int64\n", + "\n", + "Duplicate rows: 0\n" + ] + } + ], + "source": [ + "print(f\"Shape: {df.shape}\")\n", + "print(f\"Columns: {list(df.columns)}\")\n", + "\n", + "# Check for missing values\n", + "print(\"\\nMissing values per column:\")\n", + "print(df.isnull().sum())\n", + "\n", + "# Check duplicates\n", + "print(f\"\\nDuplicate rows: {df.duplicated().sum()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "dc41ba64", + "metadata": {}, "outputs": [], "source": [ - "# Your code goes here" + "df.columns = df.columns.str.lower().str.replace(' ', '_')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ede5ccd2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "unnamed:_0 0\n", + "customer 0\n", + "state 0\n", + "customer_lifetime_value 0\n", + "response 0\n", + "coverage 0\n", + "education 0\n", + "effective_to_date 0\n", + "employmentstatus 0\n", + "gender 0\n", + "income 0\n", + "location_code 0\n", + "marital_status 0\n", + "monthly_premium_auto 0\n", + "months_since_last_claim 0\n", + "months_since_policy_inception 0\n", + "number_of_open_complaints 0\n", + "number_of_policies 0\n", + "policy_type 0\n", + "policy 0\n", + "renew_offer_type 0\n", + "sales_channel 0\n", + "total_claim_amount 0\n", + "vehicle_class 0\n", + "vehicle_size 0\n", + "vehicle_type 0\n", + "month 0\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "print(df.isnull().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "99fe37c7", + "metadata": {}, + "outputs": [], + "source": [ + "if 'vehicle_type' in df.columns:\n", + " if df['vehicle_type'].isnull().sum() / len(df) > 0.8:\n", + " df.drop('vehicle_type', axis=1, inplace=True)\n", + " print(\"Dropped 'vehicle_type' due to high missingness.\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e484ac45", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\migue\\AppData\\Local\\Temp\\ipykernel_26136\\1209922921.py:4: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", + "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", + "\n", + "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", + "\n", + "\n", + " df[col].fillna(df[col].median(), inplace=True)\n", + "C:\\Users\\migue\\AppData\\Local\\Temp\\ipykernel_26136\\1209922921.py:10: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", + "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", + "\n", + "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", + "\n", + "\n", + " df[col].fillna(df[col].mode()[0], inplace=True)\n" + ] + } + ], + "source": [ + "# Fill missing numeric columns with median\n", + "numeric_cols = df.select_dtypes(include=[np.number]).columns\n", + "for col in numeric_cols:\n", + " df[col].fillna(df[col].median(), inplace=True)\n", + "\n", + "# Fill missing categorical columns with mode\n", + "categorical_cols = df.select_dtypes(include=['object']).columns\n", + "for col in categorical_cols:\n", + " if df[col].notnull().any(): # avoid all-NaN case\n", + " df[col].fillna(df[col].mode()[0], inplace=True)\n", + "\n", + "# Convert 'effective_to_date' to datetime\n", + "df['effective_to_date'] = pd.to_datetime(df['effective_to_date'], errors='coerce')\n", + "\n", + "# Optional: Extract month for later analysis\n", + "df['effective_month'] = df['effective_to_date'].dt.month" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "130e0a58", + "metadata": {}, + "outputs": [], + "source": [ + "for col in categorical_cols:\n", + " df[col] = df[col].astype('category')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c018edbc", + "metadata": {}, + "outputs": [], + "source": [ + "for col in df.select_dtypes(include='object').columns:\n", + " if df[col].dtype == 'object':\n", + " df[col] = df[col].str.strip().str.title() # Title case" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c12c4b4b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cleaned Data Info:\n", + "\n", + "RangeIndex: 10910 entries, 0 to 10909\n", + "Data columns (total 28 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 unnamed:_0 10910 non-null int64 \n", + " 1 customer 10910 non-null category\n", + " 2 state 10910 non-null category\n", + " 3 customer_lifetime_value 10910 non-null float64 \n", + " 4 response 10910 non-null category\n", + " 5 coverage 10910 non-null category\n", + " 6 education 10910 non-null category\n", + " 7 effective_to_date 10910 non-null category\n", + " 8 employmentstatus 10910 non-null category\n", + " 9 gender 10910 non-null category\n", + " 10 income 10910 non-null int64 \n", + " 11 location_code 10910 non-null category\n", + " 12 marital_status 10910 non-null category\n", + " 13 monthly_premium_auto 10910 non-null int64 \n", + " 14 months_since_last_claim 10910 non-null float64 \n", + " 15 months_since_policy_inception 10910 non-null int64 \n", + " 16 number_of_open_complaints 10910 non-null float64 \n", + " 17 number_of_policies 10910 non-null int64 \n", + " 18 policy_type 10910 non-null category\n", + " 19 policy 10910 non-null category\n", + " 20 renew_offer_type 10910 non-null category\n", + " 21 sales_channel 10910 non-null category\n", + " 22 total_claim_amount 10910 non-null float64 \n", + " 23 vehicle_class 10910 non-null category\n", + " 24 vehicle_size 10910 non-null category\n", + " 25 vehicle_type 10910 non-null category\n", + " 26 month 10910 non-null int64 \n", + " 27 effective_month 10910 non-null int32 \n", + "dtypes: category(17), float64(4), int32(1), int64(6)\n", + "memory usage: 1.4 MB\n", + "None\n", + "\n", + "Cleaned Data Sample:\n", + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "\n", + " policy_type policy renew_offer_type sales_channel \\\n", + "0 Corporate Auto Corporate L3 Offer3 Agent \n", + "1 Personal Auto Personal L3 Offer4 Call Center \n", + "2 Personal Auto Personal L3 Offer3 Call Center \n", + "3 Corporate Auto Corporate L3 Offer2 Branch \n", + "4 Personal Auto Personal L2 Offer1 Branch \n", + "\n", + " total_claim_amount vehicle_class vehicle_size vehicle_type month \\\n", + "0 292.800000 Four-Door Car Medsize A 2 \n", + "1 744.924331 Four-Door Car Medsize A 1 \n", + "2 480.000000 SUV Medsize A 2 \n", + "3 484.013411 Four-Door Car Medsize A 1 \n", + "4 707.925645 Four-Door Car Medsize A 1 \n", + "\n", + " effective_month \n", + "0 2 \n", + "1 1 \n", + "2 2 \n", + "3 1 \n", + "4 1 \n", + "\n", + "[5 rows x 28 columns]\n" + ] + } + ], + "source": [ + "print(\"Cleaned Data Info:\")\n", + "print(df.info())\n", + "\n", + "print(\"\\nCleaned Data Sample:\")\n", + "print(df.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "b42d04d4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['unnamed:_0', 'customer', 'state', 'customer_lifetime_value',\n", + " 'response', 'coverage', 'education', 'effective_to_date',\n", + " 'employmentstatus', 'gender', 'income', 'location_code',\n", + " 'marital_status', 'monthly_premium_auto', 'months_since_last_claim',\n", + " 'months_since_policy_inception', 'number_of_open_complaints',\n", + " 'number_of_policies', 'policy_type', 'policy', 'renew_offer_type',\n", + " 'sales_channel', 'total_claim_amount', 'vehicle_class', 'vehicle_size',\n", + " 'vehicle_type', 'month', 'effective_month'],\n", + " dtype='object')\n", + " total_revenue\n", + "sales_channel \n", + "Agent 1810226.82\n", + "Branch 1301204.00\n", + "Call Center 926600.82\n", + "Web 706600.04\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\migue\\AppData\\Local\\Temp\\ipykernel_26136\\3335164488.py:6: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior\n", + " pivot_revenue = pd.pivot_table(\n" + ] + } + ], + "source": [ + "print(df.columns)\n", + "\n", + "df['estimated_yearly_premium'] = df['monthly_premium_auto'] * 12\n", + "\n", + "# Create pivot table: sum of total_claim_amount by sales_channel\n", + "pivot_revenue = pd.pivot_table(\n", + " df,\n", + " values='total_claim_amount',\n", + " index='sales_channel',\n", + " aggfunc='sum'\n", + ")\n", + "\n", + "# Round to 2 decimal places\n", + "pivot_revenue['total_claim_amount'] = pivot_revenue['total_claim_amount'].round(2)\n", + "\n", + "# Sort from highest to lowest\n", + "pivot_revenue = pivot_revenue.sort_values(by='total_claim_amount', ascending=False)\n", + "\n", + "# Rename column for clarity\n", + "pivot_revenue.rename(columns={'total_claim_amount': 'total_revenue'}, inplace=True)\n", + "\n", + "print(pivot_revenue)\n" ] }, { @@ -103,6 +644,64 @@ "2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights." ] }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ace7f314", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Customer Lifetime Value by Education and Gender:\n", + "gender F M\n", + "education \n", + "High School or Below 8675.22 8149.69\n", + "Bachelor 7874.27 7703.60\n", + "College 7748.82 8052.46\n", + "Master 8157.05 8168.83\n", + "Doctor 7328.51 7415.33\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\migue\\AppData\\Local\\Temp\\ipykernel_26136\\2550185581.py:1: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior\n", + " pivot_clv = pd.pivot_table(\n" + ] + } + ], + "source": [ + "pivot_clv = pd.pivot_table(\n", + " df,\n", + " values='customer_lifetime_value', # Metric to aggregate\n", + " index='education', # Rows: Education level\n", + " columns='gender', # Columns: Gender\n", + " aggfunc='mean' # Summary function\n", + ")\n", + "\n", + "# Round to 2 decimal places\n", + "pivot_clv = pivot_clv.round(2)\n", + "\n", + "# Sort education levels in a more meaningful order (if needed)\n", + "edu_order = [\n", + " 'High School or Below',\n", + " 'Bachelor',\n", + " 'College',\n", + " 'Master',\n", + " 'Doctor'\n", + "]\n", + "\n", + "# Only keep and order education levels that exist in the data\n", + "existing_edu = [edu for edu in edu_order if edu in pivot_clv.index]\n", + "pivot_clv = pivot_clv.loc[existing_edu]\n", + "\n", + "print(\"Average Customer Lifetime Value by Education and Gender:\")\n", + "print(pivot_clv)" + ] + }, { "cell_type": "markdown", "id": "32c7f2e5-3d90-43e5-be33-9781b6069198", @@ -146,7 +745,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -160,7 +759,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4,