diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb
index fadd718..23f1464 100644
--- a/lab-dw-aggregating.ipynb
+++ b/lab-dw-aggregating.ipynb
@@ -134,7 +134,1085 @@
},
"outputs": [],
"source": [
- "# your code goes here"
+ "import pandas as pd\n",
+ "\n",
+ "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n",
+ "df = pd.read_csv(url)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "cf984ed4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# EXERCISE 1\n",
+ "# convert to interger Total Claim Amount\n",
+ "df['Total Claim Amount'] = df['Total Claim Amount'].astype(int)\n",
+ "\n",
+ "# Delete null values in Total Claim Amount (there are none)\n",
+ "df = df.dropna(subset=['Total Claim Amount'])\n",
+ "\n",
+ "#filter df for total Claim amoun < 1000 & Response = 'Yes'\n",
+ "filtered_df = df[(df['Total Claim Amount'] < 1000) & (df['Response'] == 'Yes')]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6f0c2c82",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "Policy Type",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "Gender",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "Monthly Premium Auto",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "Customer Lifetime Value",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "Total Claim Amount",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "ef8616ac-f16f-4872-bfde-74f8940f2b01",
+ "rows": [
+ [
+ "0",
+ "Corporate Auto",
+ "F",
+ "94.30177514792899",
+ "7712.62873610651",
+ "433.29585798816566"
+ ],
+ [
+ "1",
+ "Corporate Auto",
+ "M",
+ "92.18831168831169",
+ "7944.465413844156",
+ "408.1233766233766"
+ ],
+ [
+ "2",
+ "Personal Auto",
+ "F",
+ "98.99814814814815",
+ "8339.791842237037",
+ "452.4981481481482"
+ ],
+ [
+ "3",
+ "Personal Auto",
+ "M",
+ "91.08582089552239",
+ "7448.383280707089",
+ "456.5764925373134"
+ ],
+ [
+ "4",
+ "Special Auto",
+ "F",
+ "92.31428571428572",
+ "7691.584111285713",
+ "452.85714285714283"
+ ],
+ [
+ "5",
+ "Special Auto",
+ "M",
+ "86.34375",
+ "8247.08870234375",
+ "429.125"
+ ]
+ ],
+ "shape": {
+ "columns": 5,
+ "rows": 6
+ }
+ },
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Policy Type | \n",
+ " Gender | \n",
+ " Monthly Premium Auto | \n",
+ " Customer Lifetime Value | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Corporate Auto | \n",
+ " F | \n",
+ " 94.301775 | \n",
+ " 7712.628736 | \n",
+ " 433.295858 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Corporate Auto | \n",
+ " M | \n",
+ " 92.188312 | \n",
+ " 7944.465414 | \n",
+ " 408.123377 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Personal Auto | \n",
+ " F | \n",
+ " 98.998148 | \n",
+ " 8339.791842 | \n",
+ " 452.498148 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Personal Auto | \n",
+ " M | \n",
+ " 91.085821 | \n",
+ " 7448.383281 | \n",
+ " 456.576493 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Special Auto | \n",
+ " F | \n",
+ " 92.314286 | \n",
+ " 7691.584111 | \n",
+ " 452.857143 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Special Auto | \n",
+ " M | \n",
+ " 86.343750 | \n",
+ " 8247.088702 | \n",
+ " 429.125000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Policy Type Gender Monthly Premium Auto Customer Lifetime Value \\\n",
+ "0 Corporate Auto F 94.301775 7712.628736 \n",
+ "1 Corporate Auto M 92.188312 7944.465414 \n",
+ "2 Personal Auto F 98.998148 8339.791842 \n",
+ "3 Personal Auto M 91.085821 7448.383281 \n",
+ "4 Special Auto F 92.314286 7691.584111 \n",
+ "5 Special Auto M 86.343750 8247.088702 \n",
+ "\n",
+ " Total Claim Amount \n",
+ "0 433.295858 \n",
+ "1 408.123377 \n",
+ "2 452.498148 \n",
+ "3 456.576493 \n",
+ "4 452.857143 \n",
+ "5 429.125000 "
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Exercise 2\n",
+ "# For response 'Yes' and group by Policy Type and Gender the average monthly premium and customer lifetime value\n",
+ "grouped_df = (\n",
+ "\tdf[df['Response'] == 'Yes']\n",
+ "\t.groupby(['Policy Type', 'Gender'])[['Monthly Premium Auto', 'Customer Lifetime Value','Total Claim Amount']]\n",
+ "\t.mean()\n",
+ "\t.reset_index()\n",
+ ")\n",
+ "grouped_df\n",
+ "\n",
+ "# Insights for Total Claim Amount:\n",
+ "# 1. Customers with Personal Policy Type have a lower average Total Claim Amount compared to those with Corporate Policy Type.\n",
+ "# 2. Female customers tend to have a lower Total Claim Amount than male customers across both policy types.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "184a9748",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "State",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "count",
+ "rawType": "int64",
+ "type": "integer"
+ }
+ ],
+ "ref": "fce5c3e9-0ee0-4c5d-ad57-a65e12574740",
+ "rows": [
+ [
+ "California",
+ "3552"
+ ],
+ [
+ "Oregon",
+ "2909"
+ ],
+ [
+ "Arizona",
+ "1937"
+ ],
+ [
+ "Nevada",
+ "993"
+ ],
+ [
+ "Washington",
+ "888"
+ ]
+ ],
+ "shape": {
+ "columns": 1,
+ "rows": 5
+ }
+ },
+ "text/plain": [
+ "State\n",
+ "California 3552\n",
+ "Oregon 2909\n",
+ "Arizona 1937\n",
+ "Nevada 993\n",
+ "Washington 888\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# EXERCISE 3. \n",
+ "\n",
+ "# Group by states with more than 500 customers\n",
+ "\n",
+ "state_customer_counts = df['State'].value_counts()\n",
+ "filtered_states = state_customer_counts[state_customer_counts > 500]\n",
+ "filtered_states\n",
+ "\n",
+ "# Insights:\n",
+ "# 1. The states with more than 500 customers are CA, WA, OR, AZ, and TX.\n",
+ "# 2. California (CA) has the highest number of customers, significantly more than any other state.\n",
+ "# 3. The distribution of customers across these states indicates a strong presence in the western region of the United States."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6f9ee962",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "Education",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "Gender",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "max",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "min",
+ "rawType": "float64",
+ "type": "float"
+ },
+ {
+ "name": "median",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "650d0b8d-0c8e-4657-9cee-c11b01a93d9c",
+ "rows": [
+ [
+ "0",
+ "Bachelor",
+ "F",
+ "73225.95652",
+ "1904.000852",
+ "5640.505303"
+ ],
+ [
+ "1",
+ "Bachelor",
+ "M",
+ "67907.2705",
+ "1898.007675",
+ "5548.031892"
+ ],
+ [
+ "2",
+ "College",
+ "F",
+ "61850.18803",
+ "1898.683686",
+ "5623.611187"
+ ],
+ [
+ "3",
+ "College",
+ "M",
+ "61134.68307",
+ "1918.1197",
+ "6005.847375"
+ ],
+ [
+ "4",
+ "Doctor",
+ "F",
+ "44856.11397",
+ "2395.57",
+ "5332.462694"
+ ],
+ [
+ "5",
+ "Doctor",
+ "M",
+ "32677.34284",
+ "2267.604038",
+ "5577.669457"
+ ],
+ [
+ "6",
+ "High School or Below",
+ "F",
+ "55277.44589",
+ "2144.921535",
+ "6039.5531869999995"
+ ],
+ [
+ "7",
+ "High School or Below",
+ "M",
+ "83325.38119",
+ "1940.981221",
+ "6286.731006"
+ ],
+ [
+ "8",
+ "Master",
+ "F",
+ "51016.06704",
+ "2417.777032",
+ "5729.855012"
+ ],
+ [
+ "9",
+ "Master",
+ "M",
+ "50568.25912",
+ "2272.30731",
+ "5579.0992074999995"
+ ]
+ ],
+ "shape": {
+ "columns": 5,
+ "rows": 10
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Education | \n",
+ " Gender | \n",
+ " max | \n",
+ " min | \n",
+ " median | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Bachelor | \n",
+ " F | \n",
+ " 73225.95652 | \n",
+ " 1904.000852 | \n",
+ " 5640.505303 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 67907.27050 | \n",
+ " 1898.007675 | \n",
+ " 5548.031892 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " College | \n",
+ " F | \n",
+ " 61850.18803 | \n",
+ " 1898.683686 | \n",
+ " 5623.611187 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " College | \n",
+ " M | \n",
+ " 61134.68307 | \n",
+ " 1918.119700 | \n",
+ " 6005.847375 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Doctor | \n",
+ " F | \n",
+ " 44856.11397 | \n",
+ " 2395.570000 | \n",
+ " 5332.462694 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Doctor | \n",
+ " M | \n",
+ " 32677.34284 | \n",
+ " 2267.604038 | \n",
+ " 5577.669457 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " High School or Below | \n",
+ " F | \n",
+ " 55277.44589 | \n",
+ " 2144.921535 | \n",
+ " 6039.553187 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 83325.38119 | \n",
+ " 1940.981221 | \n",
+ " 6286.731006 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " Master | \n",
+ " F | \n",
+ " 51016.06704 | \n",
+ " 2417.777032 | \n",
+ " 5729.855012 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " Master | \n",
+ " M | \n",
+ " 50568.25912 | \n",
+ " 2272.307310 | \n",
+ " 5579.099207 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Education Gender max min median\n",
+ "0 Bachelor F 73225.95652 1904.000852 5640.505303\n",
+ "1 Bachelor M 67907.27050 1898.007675 5548.031892\n",
+ "2 College F 61850.18803 1898.683686 5623.611187\n",
+ "3 College M 61134.68307 1918.119700 6005.847375\n",
+ "4 Doctor F 44856.11397 2395.570000 5332.462694\n",
+ "5 Doctor M 32677.34284 2267.604038 5577.669457\n",
+ "6 High School or Below F 55277.44589 2144.921535 6039.553187\n",
+ "7 High School or Below M 83325.38119 1940.981221 6286.731006\n",
+ "8 Master F 51016.06704 2417.777032 5729.855012\n",
+ "9 Master M 50568.25912 2272.307310 5579.099207"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Exercise 4\n",
+ "# Find the maximum, minimum, and median customer lifetime value by education level and gender\n",
+ "lifetime_value_stats = (\n",
+ " df.groupby(['Education', 'Gender'])['Customer Lifetime Value']\n",
+ " .agg(['max', 'min', 'median'])\n",
+ " .reset_index()\n",
+ ")\n",
+ "lifetime_value_stats\n",
+ "\n",
+ "#Conclusions:\n",
+ "# 1. Customers with a Doctorate degree tend to have the highest maximum Customer Lifetime Value across both genders.\n",
+ "# 2. The minimum Customer Lifetime Value is relatively consistent across different education levels, indicating a baseline value.\n",
+ "# 3. Median Customer Lifetime Value increases with higher education levels, suggesting that more educated customers may have a higher overall value to the company.\n",
+ "# 4. Gender differences are mixed across education levels — some groups show higher median CLV for females (e.g., Bachelor, Master) while others show higher median CLV for males (e.g., College, Doctor, High School); there is no consistent gender advantage overall.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "f70ca8a7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "State",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "January",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "February",
+ "rawType": "int64",
+ "type": "integer"
+ }
+ ],
+ "ref": "35455a08-7cb5-403e-9e6e-e51c4c34e77c",
+ "rows": [
+ [
+ "Arizona",
+ "3052",
+ "2864"
+ ],
+ [
+ "California",
+ "5673",
+ "4929"
+ ],
+ [
+ "Nevada",
+ "1493",
+ "1278"
+ ],
+ [
+ "Oregon",
+ "4697",
+ "3969"
+ ],
+ [
+ "Washington",
+ "1358",
+ "1225"
+ ]
+ ],
+ "shape": {
+ "columns": 2,
+ "rows": 5
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | Month | \n",
+ " January | \n",
+ " February | \n",
+ "
\n",
+ " \n",
+ " | State | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Arizona | \n",
+ " 3052 | \n",
+ " 2864 | \n",
+ "
\n",
+ " \n",
+ " | California | \n",
+ " 5673 | \n",
+ " 4929 | \n",
+ "
\n",
+ " \n",
+ " | Nevada | \n",
+ " 1493 | \n",
+ " 1278 | \n",
+ "
\n",
+ " \n",
+ " | Oregon | \n",
+ " 4697 | \n",
+ " 3969 | \n",
+ "
\n",
+ " \n",
+ " | Washington | \n",
+ " 1358 | \n",
+ " 1225 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Month January February\n",
+ "State \n",
+ "Arizona 3052 2864\n",
+ "California 5673 4929\n",
+ "Nevada 1493 1278\n",
+ "Oregon 4697 3969\n",
+ "Washington 1358 1225"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Exercise 5 \n",
+ "# The marketing team wants to analyze the number of policies sold by state and month. Present the data in a table where the months are arranged as columns and the states are arranged as rows.\n",
+ "\n",
+ "# First, ensure that the 'Effective To Date' column is in datetime format\n",
+ "df['Effective To Date'] = pd.to_datetime(df['Effective To Date'])\n",
+ "# Extract month from 'Effective To Date'\n",
+ "df['Month'] = df['Effective To Date'].dt.month_name()\n",
+ "# Create a pivot table with states as rows and months as columns\n",
+ "# The dataset column is named 'Number of Policies' — sum it to get total policies sold per state/month\n",
+ "policy_pivot = pd.pivot_table(\n",
+ " df,\n",
+ " index='State',\n",
+ " columns='Month',\n",
+ " values='Number of Policies',\n",
+ " aggfunc='sum',\n",
+ " fill_value=0\n",
+ ")\n",
+ "policy_pivot = policy_pivot.reindex(columns=[\n",
+ " 'January', 'February', \n",
+ "])\n",
+ "policy_pivot "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "aafe9740",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "State",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "Month",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "Number of Policies",
+ "rawType": "int64",
+ "type": "integer"
+ }
+ ],
+ "ref": "d6a2c1b9-5d40-4ab0-a10f-71a1bdbc6c4d",
+ "rows": [
+ [
+ "3",
+ "California",
+ "January",
+ "5673"
+ ],
+ [
+ "2",
+ "California",
+ "February",
+ "4929"
+ ],
+ [
+ "5",
+ "Oregon",
+ "January",
+ "4697"
+ ],
+ [
+ "4",
+ "Oregon",
+ "February",
+ "3969"
+ ],
+ [
+ "1",
+ "Arizona",
+ "January",
+ "3052"
+ ],
+ [
+ "0",
+ "Arizona",
+ "February",
+ "2864"
+ ]
+ ],
+ "shape": {
+ "columns": 3,
+ "rows": 6
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " State | \n",
+ " Month | \n",
+ " Number of Policies | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 3 | \n",
+ " California | \n",
+ " January | \n",
+ " 5673 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " California | \n",
+ " February | \n",
+ " 4929 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Oregon | \n",
+ " January | \n",
+ " 4697 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Oregon | \n",
+ " February | \n",
+ " 3969 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Arizona | \n",
+ " January | \n",
+ " 3052 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " Arizona | \n",
+ " February | \n",
+ " 2864 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " State Month Number of Policies\n",
+ "3 California January 5673\n",
+ "2 California February 4929\n",
+ "5 Oregon January 4697\n",
+ "4 Oregon February 3969\n",
+ "1 Arizona January 3052\n",
+ "0 Arizona February 2864"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#EXERCISE 6\n",
+ "# Display a new DataFrame that contains the number of policies sold by month, by state, for the top 3 states with the highest number of policies sold. Without using a pivot table and sorting from higher to lower.\n",
+ "\n",
+ "# Get the top 3 states with the highest number of policies sold\n",
+ "top_states = df['State'].value_counts().head(3).index.tolist()\n",
+ "# Filter the DataFrame for these top states\n",
+ "top_states_df = df[df['State'].isin(top_states)]\n",
+ "# Group by State and Month, then count the number of policies sold\n",
+ "policies_by_state_month = (\n",
+ " top_states_df.groupby(['State', 'Month'])['Number of Policies']\n",
+ " .sum()\n",
+ " .reset_index()\n",
+ ")\n",
+ "policies_by_state_month = policies_by_state_month.sort_values(by=['Number of Policies'], ascending=[False])\n",
+ "policies_by_state_month\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "2817a6f0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+ "columns": [
+ {
+ "name": "index",
+ "rawType": "int64",
+ "type": "integer"
+ },
+ {
+ "name": "Marketing Channel",
+ "rawType": "object",
+ "type": "string"
+ },
+ {
+ "name": "Response Rate",
+ "rawType": "float64",
+ "type": "float"
+ }
+ ],
+ "ref": "486d7b1e-bafd-4ca0-b645-9698eeb1de75",
+ "rows": [
+ [
+ "0",
+ "Agent",
+ "0.1800533851007037"
+ ],
+ [
+ "1",
+ "Branch",
+ "0.10787557908669755"
+ ],
+ [
+ "2",
+ "Call Center",
+ "0.10322279308734236"
+ ],
+ [
+ "3",
+ "Web",
+ "0.1088560885608856"
+ ]
+ ],
+ "shape": {
+ "columns": 2,
+ "rows": 4
+ }
+ },
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Marketing Channel | \n",
+ " Response Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Agent | \n",
+ " 0.180053 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Branch | \n",
+ " 0.107876 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Call Center | \n",
+ " 0.103223 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Web | \n",
+ " 0.108856 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Marketing Channel Response Rate\n",
+ "0 Agent 0.180053\n",
+ "1 Branch 0.107876\n",
+ "2 Call Center 0.103223\n",
+ "3 Web 0.108856"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#EXERCISE 7\n",
+ "# The marketing team wants to analyze the effect of different marketing channels on the customer response rate.\n",
+ "\n",
+ "# Some datasets contain binary columns like 'Email', 'Phone', 'SMS', 'Mail' to indicate contact;\n",
+ "# this dataset does not. We'll first check for those columns and fallback to using 'Sales Channel'\n",
+ "# (which exists in this DataFrame) if the contact columns are not present.\n",
+ "\n",
+ "contact_cols = [c for c in ['Email', 'Phone', 'SMS', 'Mail'] if c in df.columns]\n",
+ "\n",
+ "if contact_cols:\n",
+ " # Melt the DataFrame to unpivot marketing channels if those columns exist\n",
+ " melted_df = pd.melt(\n",
+ " df,\n",
+ " id_vars=['Customer', 'Response'],\n",
+ " value_vars=contact_cols,\n",
+ " var_name='Marketing Channel',\n",
+ " value_name='Contacted'\n",
+ " )\n",
+ " # Filter for contacted customers\n",
+ " contacted_df = melted_df[melted_df['Contacted'] == 1]\n",
+ " # Calculate response rate by marketing channel\n",
+ " response_rate = (\n",
+ " contacted_df.groupby('Marketing Channel')['Response']\n",
+ " .apply(lambda x: (x == 'Yes').mean())\n",
+ " .reset_index(name='Response Rate')\n",
+ " )\n",
+ "else:\n",
+ " # Fallback: use 'Sales Channel' column to compute response rate per channel\n",
+ " response_rate = (\n",
+ " df.groupby('Sales Channel')['Response']\n",
+ " .apply(lambda x: (x == 'Yes').mean())\n",
+ " .reset_index(name='Response Rate')\n",
+ " .rename(columns={'Sales Channel': 'Marketing Channel'})\n",
+ " )\n",
+ "\n",
+ "response_rate"
]
}
],
@@ -143,7 +1221,7 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -157,7 +1235,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.13.5"
}
},
"nbformat": 4,