diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb
index ec4e3f9..63400ed 100644
--- a/lab-dw-data-structuring-and-combining.ipynb
+++ b/lab-dw-data-structuring-and-combining.ipynb
@@ -36,14 +36,794 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"id": "492d06e3-92c7-4105-ac72-536db98d3244",
"metadata": {
"id": "492d06e3-92c7-4105-ac72-536db98d3244"
},
"outputs": [],
"source": [
- "# Your code goes here"
+ "# Your code goes here\n",
+ "import pandas as pd\n",
+ "df1 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\")\n",
+ "df2 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\")\n",
+ "df3 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "96b7adbf-a082-4b9d-b978-204596ca4444",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(4008, 11), (996, 11), (7070, 11)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f\"{df1.shape}, {df2.shape}, {df3.shape}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "215f6eaa-2b65-45e2-a985-065484eaa066",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(4008, 11)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " gender | \n",
+ " education | \n",
+ " customer_lifetime_value | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " number_of_open_complaints | \n",
+ " policy_type | \n",
+ " vehicle_class | \n",
+ " total_claim_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state gender education customer_lifetime_value \\\n",
+ "0 RB50392 Washington NaN Master NaN \n",
+ "1 QZ44356 Arizona F Bachelor 697953.59% \n",
+ "2 AI49188 Nevada F Bachelor 1288743.17% \n",
+ "3 WW63253 California M Bachelor 764586.18% \n",
+ "4 GA49547 Washington M High School or Below 536307.65% \n",
+ "\n",
+ " income monthly_premium_auto number_of_open_complaints policy_type \\\n",
+ "0 0.0 1000.0 1/0/00 Personal Auto \n",
+ "1 0.0 94.0 1/0/00 Personal Auto \n",
+ "2 48767.0 108.0 1/0/00 Personal Auto \n",
+ "3 0.0 106.0 1/0/00 Corporate Auto \n",
+ "4 36357.0 68.0 1/0/00 Personal Auto \n",
+ "\n",
+ " vehicle_class total_claim_amount \n",
+ "0 Four-Door Car 2.704934 \n",
+ "1 Four-Door Car 1131.464935 \n",
+ "2 Two-Door Car 566.472247 \n",
+ "3 SUV 529.881344 \n",
+ "4 Four-Door Car 17.269323 "
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Clean dataset 1\n",
+ "clean_columns = [col.replace(' ', '_').lower() for col in list(df1.columns)]\n",
+ "clean_columns[1] = \"state\"\n",
+ "df1.columns = clean_columns\n",
+ "print(df1.shape)\n",
+ "df1.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "f230c026-7557-4097-9136-ba7d2bfc8be3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(996, 11)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " gender | \n",
+ " education | \n",
+ " customer_lifetime_value | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " number_of_open_complaints | \n",
+ " total_claim_amount | \n",
+ " policy_type | \n",
+ " vehicle_class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " GS98873 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 323912.47% | \n",
+ " 16061 | \n",
+ " 88 | \n",
+ " 1/0/00 | \n",
+ " 633.6 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " F | \n",
+ " Master | \n",
+ " 462680.11% | \n",
+ " 79487 | \n",
+ " 114 | \n",
+ " 1/0/00 | \n",
+ " 547.2 | \n",
+ " Special Auto | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " MY31220 | \n",
+ " California | \n",
+ " F | \n",
+ " College | \n",
+ " 899704.02% | \n",
+ " 54230 | \n",
+ " 112 | \n",
+ " 1/0/00 | \n",
+ " 537.6 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " UH35128 | \n",
+ " Oregon | \n",
+ " F | \n",
+ " College | \n",
+ " 2580706.30% | \n",
+ " 71210 | \n",
+ " 214 | \n",
+ " 1/1/00 | \n",
+ " 1027.2 | \n",
+ " Personal Auto | \n",
+ " Luxury Car | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " WH52799 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " College | \n",
+ " 380812.21% | \n",
+ " 94903 | \n",
+ " 94 | \n",
+ " 1/0/00 | \n",
+ " 451.2 | \n",
+ " Corporate Auto | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state gender education customer_lifetime_value income \\\n",
+ "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n",
+ "1 CW49887 California F Master 462680.11% 79487 \n",
+ "2 MY31220 California F College 899704.02% 54230 \n",
+ "3 UH35128 Oregon F College 2580706.30% 71210 \n",
+ "4 WH52799 Arizona F College 380812.21% 94903 \n",
+ "\n",
+ " monthly_premium_auto number_of_open_complaints total_claim_amount \\\n",
+ "0 88 1/0/00 633.6 \n",
+ "1 114 1/0/00 547.2 \n",
+ "2 112 1/0/00 537.6 \n",
+ "3 214 1/1/00 1027.2 \n",
+ "4 94 1/0/00 451.2 \n",
+ "\n",
+ " policy_type vehicle_class \n",
+ "0 Personal Auto Four-Door Car \n",
+ "1 Special Auto SUV \n",
+ "2 Personal Auto Two-Door Car \n",
+ "3 Personal Auto Luxury Car \n",
+ "4 Corporate Auto Two-Door Car "
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Clean dataset 2\n",
+ "clean_columns = [col.replace(' ', '_').lower() for col in list(df2.columns)]\n",
+ "clean_columns[1] = \"state\"\n",
+ "df2.columns = clean_columns\n",
+ "print(df2.shape)\n",
+ "df2.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "162db87f-cfd2-474c-af97-3e5bf5731421",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(7070, 11)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " education | \n",
+ " gender | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " number_of_open_complaints | \n",
+ " policy_type | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " SA25987 | \n",
+ " Washington | \n",
+ " 3479.137523 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 0 | \n",
+ " 104 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 499.200000 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " TB86706 | \n",
+ " Arizona | \n",
+ " 2502.637401 | \n",
+ " Master | \n",
+ " M | \n",
+ " 0 | \n",
+ " 66 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 3.468912 | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " ZL73902 | \n",
+ " Nevada | \n",
+ " 3265.156348 | \n",
+ " Bachelor | \n",
+ " F | \n",
+ " 25820 | \n",
+ " 82 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 393.600000 | \n",
+ " Four-Door Car | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " KX23516 | \n",
+ " California | \n",
+ " 4455.843406 | \n",
+ " High School or Below | \n",
+ " F | \n",
+ " 0 | \n",
+ " 121 | \n",
+ " 0 | \n",
+ " Personal Auto | \n",
+ " 699.615192 | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " FN77294 | \n",
+ " California | \n",
+ " 7704.958480 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 30366 | \n",
+ " 101 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " 484.800000 | \n",
+ " SUV | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state customer_lifetime_value education gender \\\n",
+ "0 SA25987 Washington 3479.137523 High School or Below M \n",
+ "1 TB86706 Arizona 2502.637401 Master M \n",
+ "2 ZL73902 Nevada 3265.156348 Bachelor F \n",
+ "3 KX23516 California 4455.843406 High School or Below F \n",
+ "4 FN77294 California 7704.958480 High School or Below M \n",
+ "\n",
+ " income monthly_premium_auto number_of_open_complaints policy_type \\\n",
+ "0 0 104 0 Personal Auto \n",
+ "1 0 66 0 Personal Auto \n",
+ "2 25820 82 0 Personal Auto \n",
+ "3 0 121 0 Personal Auto \n",
+ "4 30366 101 2 Personal Auto \n",
+ "\n",
+ " total_claim_amount vehicle_class \n",
+ "0 499.200000 Two-Door Car \n",
+ "1 3.468912 Two-Door Car \n",
+ "2 393.600000 Four-Door Car \n",
+ "3 699.615192 SUV \n",
+ "4 484.800000 SUV "
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Clean dataset 3\n",
+ "clean_columns = [col.replace(' ', '_').lower() for col in list(df3.columns)]\n",
+ "clean_columns[1] = \"state\"\n",
+ "df3.columns = clean_columns\n",
+ "print(df3.shape)\n",
+ "df3.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "7e12ae21-59ed-4a91-aaf6-b639cb258a1e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(12074, 11)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " gender | \n",
+ " education | \n",
+ " customer_lifetime_value | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " number_of_open_complaints | \n",
+ " policy_type | \n",
+ " vehicle_class | \n",
+ " total_claim_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state gender education customer_lifetime_value \\\n",
+ "0 RB50392 Washington NaN Master NaN \n",
+ "1 QZ44356 Arizona F Bachelor 697953.59% \n",
+ "2 AI49188 Nevada F Bachelor 1288743.17% \n",
+ "3 WW63253 California M Bachelor 764586.18% \n",
+ "4 GA49547 Washington M High School or Below 536307.65% \n",
+ "\n",
+ " income monthly_premium_auto number_of_open_complaints policy_type \\\n",
+ "0 0.0 1000.0 1/0/00 Personal Auto \n",
+ "1 0.0 94.0 1/0/00 Personal Auto \n",
+ "2 48767.0 108.0 1/0/00 Personal Auto \n",
+ "3 0.0 106.0 1/0/00 Corporate Auto \n",
+ "4 36357.0 68.0 1/0/00 Personal Auto \n",
+ "\n",
+ " vehicle_class total_claim_amount \n",
+ "0 Four-Door Car 2.704934 \n",
+ "1 Four-Door Car 1131.464935 \n",
+ "2 Two-Door Car 566.472247 \n",
+ "3 SUV 529.881344 \n",
+ "4 Four-Door Car 17.269323 "
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.concat([df1, df2, df3], axis=0).reset_index(drop=True)\n",
+ "print(df.shape)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "c34bdf5b-78b2-4c6f-8f87-f0d4431123a6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 12074 entries, 0 to 12073\n",
+ "Data columns (total 11 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 customer 9137 non-null object \n",
+ " 1 state 9137 non-null object \n",
+ " 2 gender 9015 non-null object \n",
+ " 3 education 9137 non-null object \n",
+ " 4 customer_lifetime_value 9130 non-null object \n",
+ " 5 income 9137 non-null float64\n",
+ " 6 monthly_premium_auto 9137 non-null float64\n",
+ " 7 number_of_open_complaints 9137 non-null object \n",
+ " 8 policy_type 9137 non-null object \n",
+ " 9 vehicle_class 9137 non-null object \n",
+ " 10 total_claim_amount 9137 non-null float64\n",
+ "dtypes: float64(3), object(8)\n",
+ "memory usage: 1.0+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "6b756dd4-31a6-4a2d-b007-d1815238aa1c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "customer 2937\n",
+ "state 2937\n",
+ "gender 3059\n",
+ "education 2937\n",
+ "customer_lifetime_value 2944\n",
+ "income 2937\n",
+ "monthly_premium_auto 2937\n",
+ "number_of_open_complaints 2937\n",
+ "policy_type 2937\n",
+ "vehicle_class 2937\n",
+ "total_claim_amount 2937\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "73f819ea-5ae9-480b-9eb0-979d1f1145ee",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "education\n",
+ "Bachelor 2743\n",
+ "College 2682\n",
+ "High School or Below 2616\n",
+ "Master 752\n",
+ "Doctor 344\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Clean column \"customer_lifetime_value\"\n",
+ "df['customer_lifetime_value'] = df['customer_lifetime_value'].str.replace(\"%\", \"\")\n",
+ "df['education'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "3cc32489-138f-4577-99b5-1a22887b6e75",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "education\n",
+ "Bachelor 2743\n",
+ "College 2682\n",
+ "High School or Below 2616\n",
+ "Master 752\n",
+ "Doctor 344\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Clean column \"education\"\n",
+ "df['education'] = df['education'].str.replace(\"Bachelors\", \"Bachelor\")\n",
+ "df['education'].value_counts()"
]
},
{
@@ -72,14 +852,231 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 46,
"id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26",
"metadata": {
"id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unnamed:_0 | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " response | \n",
+ " coverage | \n",
+ " education | \n",
+ " effective_to_date | \n",
+ " employmentstatus | \n",
+ " gender | \n",
+ " ... | \n",
+ " number_of_policies | \n",
+ " policy_type | \n",
+ " policy | \n",
+ " renew_offer_type | \n",
+ " sales_channel | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ " vehicle_size | \n",
+ " vehicle_type | \n",
+ " month | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-02-18 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2011-01-18 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2011-02-10 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2011-01-11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " QA50777 | \n",
+ " Oregon | \n",
+ " 9025.067525 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2011-01-17 | \n",
+ " Medical Leave | \n",
+ " F | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 707.925645 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unnamed:_0 customer state customer_lifetime_value response \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No \n",
+ "1 1 KX64629 California 2228.525238 No \n",
+ "2 2 LZ68649 Washington 14947.917300 No \n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "4 4 QA50777 Oregon 9025.067525 No \n",
+ "\n",
+ " coverage education effective_to_date employmentstatus gender ... \\\n",
+ "0 Basic College 2011-02-18 Employed M ... \n",
+ "1 Basic College 2011-01-18 Unemployed F ... \n",
+ "2 Basic Bachelor 2011-02-10 Employed M ... \n",
+ "3 Extended College 2011-01-11 Employed M ... \n",
+ "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n",
+ "\n",
+ " number_of_policies policy_type policy renew_offer_type \\\n",
+ "0 9 Corporate Auto Corporate L3 Offer3 \n",
+ "1 1 Personal Auto Personal L3 Offer4 \n",
+ "2 2 Personal Auto Personal L3 Offer3 \n",
+ "3 2 Corporate Auto Corporate L3 Offer2 \n",
+ "4 7 Personal Auto Personal L2 Offer1 \n",
+ "\n",
+ " sales_channel total_claim_amount vehicle_class vehicle_size \\\n",
+ "0 Agent 292.800000 Four-Door Car Medsize \n",
+ "1 Call Center 744.924331 Four-Door Car Medsize \n",
+ "2 Call Center 480.000000 SUV Medsize \n",
+ "3 Branch 484.013411 Four-Door Car Medsize \n",
+ "4 Branch 707.925645 Four-Door Car Medsize \n",
+ "\n",
+ " vehicle_type month \n",
+ "0 A 2 \n",
+ "1 A 1 \n",
+ "2 A 2 \n",
+ "3 A 1 \n",
+ "4 A 1 \n",
+ "\n",
+ "[5 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code goes here"
+ "# Your code goes here\n",
+ "import pandas as pd\n",
+ "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\")\n",
+ "df.head()"
]
},
{
@@ -93,6 +1090,135 @@
"Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "7e88be17-e77f-4d5d-9e2b-4a25bd41d6a5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "sales_channel\n",
+ "Agent 4121\n",
+ "Branch 3022\n",
+ "Call Center 2141\n",
+ "Web 1626\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['sales_channel'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "2cf8745e-e9f1-4d82-bf95-825e4dfd514f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create pivot table: total revenue by sales channel\n",
+ "pivot_sales = pd.pivot_table(\n",
+ " df,\n",
+ " index='sales_channel', \n",
+ " values='customer_lifetime_value', \n",
+ " aggfunc='sum'\n",
+ ")\n",
+ "# Round the total revenue to 2 decimal points\n",
+ "pivot_sales = pivot_sales.round(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "e7a14ba4-cb83-4e8a-bd5e-6dc46140e27e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_lifetime_value | \n",
+ "
\n",
+ " \n",
+ " | sales_channel | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Agent | \n",
+ " 33057887.85 | \n",
+ "
\n",
+ " \n",
+ " | Branch | \n",
+ " 24359201.21 | \n",
+ "
\n",
+ " \n",
+ " | Call Center | \n",
+ " 17364288.37 | \n",
+ "
\n",
+ " \n",
+ " | Web | \n",
+ " 12697632.90 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_lifetime_value\n",
+ "sales_channel \n",
+ "Agent 33057887.85\n",
+ "Branch 24359201.21\n",
+ "Call Center 17364288.37\n",
+ "Web 12697632.90"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Sort descending to see top channel\n",
+ "pivot_sales = pivot_sales.sort_values(by='customer_lifetime_value', ascending=False)\n",
+ "pivot_sales"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "505806ba-7ebb-422b-9370-bef2eec51443",
+ "metadata": {},
+ "source": [
+ "Some insights:\n",
+ "- Agent generated the most revenue.\n",
+ "- Web or online sales contribute the least revenue."
+ ]
+ },
{
"cell_type": "markdown",
"id": "640993b2-a291-436c-a34d-a551144f8196",
@@ -103,6 +1229,110 @@
"2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "3a069e0b-b400-470e-904d-d17582191be4",
+ "metadata": {
+ "id": "3a069e0b-b400-470e-904d-d17582191be4"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | education | \n",
+ " Bachelor | \n",
+ " College | \n",
+ " Doctor | \n",
+ " High School or Below | \n",
+ " Master | \n",
+ "
\n",
+ " \n",
+ " | gender | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | F | \n",
+ " 7874.269478 | \n",
+ " 7748.823325 | \n",
+ " 7328.508916 | \n",
+ " 8675.220201 | \n",
+ " 8157.053154 | \n",
+ "
\n",
+ " \n",
+ " | M | \n",
+ " 7703.601675 | \n",
+ " 8052.459288 | \n",
+ " 7415.333638 | \n",
+ " 8149.687783 | \n",
+ " 8168.832659 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "education Bachelor College Doctor High School or Below \\\n",
+ "gender \n",
+ "F 7874.269478 7748.823325 7328.508916 8675.220201 \n",
+ "M 7703.601675 8052.459288 7415.333638 8149.687783 \n",
+ "\n",
+ "education Master \n",
+ "gender \n",
+ "F 8157.053154 \n",
+ "M 8168.832659 "
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pivot_gender_edu = pd.pivot_table(\n",
+ " df,\n",
+ " index='gender', \n",
+ " columns='education', \n",
+ " values='customer_lifetime_value', \n",
+ " aggfunc='mean' \n",
+ ")\n",
+ "pivot_gender_edu"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "511a4777-07f6-4ad4-8419-1a576e215d53",
+ "metadata": {},
+ "source": [
+ "Some insights:\n",
+ "- For Bachelor and High School or Below, females have slightly higher average customer lifetime value than males. \n",
+ "- For College, Doctor, and Master, males have slightly higher customer lifetime value than females.\n",
+ "- However, the differences between them are very small."
+ ]
+ },
{
"cell_type": "markdown",
"id": "32c7f2e5-3d90-43e5-be33-9781b6069198",
@@ -127,18 +1357,6 @@
"\n",
"*More information about long and wide format tables here: https://www.statology.org/long-vs-wide-data/*"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3a069e0b-b400-470e-904d-d17582191be4",
- "metadata": {
- "id": "3a069e0b-b400-470e-904d-d17582191be4"
- },
- "outputs": [],
- "source": [
- "# Your code goes here"
- ]
}
],
"metadata": {
@@ -160,7 +1378,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.13.9"
}
},
"nbformat": 4,