diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb index ec4e3f9..63400ed 100644 --- a/lab-dw-data-structuring-and-combining.ipynb +++ b/lab-dw-data-structuring-and-combining.ipynb @@ -36,14 +36,794 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "492d06e3-92c7-4105-ac72-536db98d3244", "metadata": { "id": "492d06e3-92c7-4105-ac72-536db98d3244" }, "outputs": [], "source": [ - "# Your code goes here" + "# Your code goes here\n", + "import pandas as pd\n", + "df1 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\")\n", + "df2 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\")\n", + "df3 = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "96b7adbf-a082-4b9d-b978-204596ca4444", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(4008, 11), (996, 11), (7070, 11)\n" + ] + } + ], + "source": [ + "print(f\"{df1.shape}, {df2.shape}, {df3.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "215f6eaa-2b65-45e2-a985-065484eaa066", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(4008, 11)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value \\\n", + "0 RB50392 Washington NaN Master NaN \n", + "1 QZ44356 Arizona F Bachelor 697953.59% \n", + "2 AI49188 Nevada F Bachelor 1288743.17% \n", + "3 WW63253 California M Bachelor 764586.18% \n", + "4 GA49547 Washington M High School or Below 536307.65% \n", + "\n", + " income monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 0.0 1000.0 1/0/00 Personal Auto \n", + "1 0.0 94.0 1/0/00 Personal Auto \n", + "2 48767.0 108.0 1/0/00 Personal Auto \n", + "3 0.0 106.0 1/0/00 Corporate Auto \n", + "4 36357.0 68.0 1/0/00 Personal Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 2.704934 \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Clean dataset 1\n", + "clean_columns = [col.replace(' ', '_').lower() for col in list(df1.columns)]\n", + "clean_columns[1] = \"state\"\n", + "df1.columns = clean_columns\n", + "print(df1.shape)\n", + "df1.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "f230c026-7557-4097-9136-ba7d2bfc8be3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(996, 11)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintstotal_claim_amountpolicy_typevehicle_class
0GS98873ArizonaFBachelor323912.47%16061881/0/00633.6Personal AutoFour-Door Car
1CW49887CaliforniaFMaster462680.11%794871141/0/00547.2Special AutoSUV
2MY31220CaliforniaFCollege899704.02%542301121/0/00537.6Personal AutoTwo-Door Car
3UH35128OregonFCollege2580706.30%712102141/1/001027.2Personal AutoLuxury Car
4WH52799ArizonaFCollege380812.21%94903941/0/00451.2Corporate AutoTwo-Door Car
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value income \\\n", + "0 GS98873 Arizona F Bachelor 323912.47% 16061 \n", + "1 CW49887 California F Master 462680.11% 79487 \n", + "2 MY31220 California F College 899704.02% 54230 \n", + "3 UH35128 Oregon F College 2580706.30% 71210 \n", + "4 WH52799 Arizona F College 380812.21% 94903 \n", + "\n", + " monthly_premium_auto number_of_open_complaints total_claim_amount \\\n", + "0 88 1/0/00 633.6 \n", + "1 114 1/0/00 547.2 \n", + "2 112 1/0/00 537.6 \n", + "3 214 1/1/00 1027.2 \n", + "4 94 1/0/00 451.2 \n", + "\n", + " policy_type vehicle_class \n", + "0 Personal Auto Four-Door Car \n", + "1 Special Auto SUV \n", + "2 Personal Auto Two-Door Car \n", + "3 Personal Auto Luxury Car \n", + "4 Corporate Auto Two-Door Car " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Clean dataset 2\n", + "clean_columns = [col.replace(' ', '_').lower() for col in list(df2.columns)]\n", + "clean_columns[1] = \"state\"\n", + "df2.columns = clean_columns\n", + "print(df2.shape)\n", + "df2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "162db87f-cfd2-474c-af97-3e5bf5731421", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(7070, 11)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueeducationgenderincomemonthly_premium_autonumber_of_open_complaintspolicy_typetotal_claim_amountvehicle_class
0SA25987Washington3479.137523High School or BelowM01040Personal Auto499.200000Two-Door Car
1TB86706Arizona2502.637401MasterM0660Personal Auto3.468912Two-Door Car
2ZL73902Nevada3265.156348BachelorF25820820Personal Auto393.600000Four-Door Car
3KX23516California4455.843406High School or BelowF01210Personal Auto699.615192SUV
4FN77294California7704.958480High School or BelowM303661012Personal Auto484.800000SUV
\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value education gender \\\n", + "0 SA25987 Washington 3479.137523 High School or Below M \n", + "1 TB86706 Arizona 2502.637401 Master M \n", + "2 ZL73902 Nevada 3265.156348 Bachelor F \n", + "3 KX23516 California 4455.843406 High School or Below F \n", + "4 FN77294 California 7704.958480 High School or Below M \n", + "\n", + " income monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 0 104 0 Personal Auto \n", + "1 0 66 0 Personal Auto \n", + "2 25820 82 0 Personal Auto \n", + "3 0 121 0 Personal Auto \n", + "4 30366 101 2 Personal Auto \n", + "\n", + " total_claim_amount vehicle_class \n", + "0 499.200000 Two-Door Car \n", + "1 3.468912 Two-Door Car \n", + "2 393.600000 Four-Door Car \n", + "3 699.615192 SUV \n", + "4 484.800000 SUV " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Clean dataset 3\n", + "clean_columns = [col.replace(' ', '_').lower() for col in list(df3.columns)]\n", + "clean_columns[1] = \"state\"\n", + "df3.columns = clean_columns\n", + "print(df3.shape)\n", + "df3.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "7e12ae21-59ed-4a91-aaf6-b639cb258a1e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(12074, 11)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstategendereducationcustomer_lifetime_valueincomemonthly_premium_autonumber_of_open_complaintspolicy_typevehicle_classtotal_claim_amount
0RB50392WashingtonNaNMasterNaN0.01000.01/0/00Personal AutoFour-Door Car2.704934
1QZ44356ArizonaFBachelor697953.59%0.094.01/0/00Personal AutoFour-Door Car1131.464935
2AI49188NevadaFBachelor1288743.17%48767.0108.01/0/00Personal AutoTwo-Door Car566.472247
3WW63253CaliforniaMBachelor764586.18%0.0106.01/0/00Corporate AutoSUV529.881344
4GA49547WashingtonMHigh School or Below536307.65%36357.068.01/0/00Personal AutoFour-Door Car17.269323
\n", + "
" + ], + "text/plain": [ + " customer state gender education customer_lifetime_value \\\n", + "0 RB50392 Washington NaN Master NaN \n", + "1 QZ44356 Arizona F Bachelor 697953.59% \n", + "2 AI49188 Nevada F Bachelor 1288743.17% \n", + "3 WW63253 California M Bachelor 764586.18% \n", + "4 GA49547 Washington M High School or Below 536307.65% \n", + "\n", + " income monthly_premium_auto number_of_open_complaints policy_type \\\n", + "0 0.0 1000.0 1/0/00 Personal Auto \n", + "1 0.0 94.0 1/0/00 Personal Auto \n", + "2 48767.0 108.0 1/0/00 Personal Auto \n", + "3 0.0 106.0 1/0/00 Corporate Auto \n", + "4 36357.0 68.0 1/0/00 Personal Auto \n", + "\n", + " vehicle_class total_claim_amount \n", + "0 Four-Door Car 2.704934 \n", + "1 Four-Door Car 1131.464935 \n", + "2 Two-Door Car 566.472247 \n", + "3 SUV 529.881344 \n", + "4 Four-Door Car 17.269323 " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.concat([df1, df2, df3], axis=0).reset_index(drop=True)\n", + "print(df.shape)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "c34bdf5b-78b2-4c6f-8f87-f0d4431123a6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 12074 entries, 0 to 12073\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 customer 9137 non-null object \n", + " 1 state 9137 non-null object \n", + " 2 gender 9015 non-null object \n", + " 3 education 9137 non-null object \n", + " 4 customer_lifetime_value 9130 non-null object \n", + " 5 income 9137 non-null float64\n", + " 6 monthly_premium_auto 9137 non-null float64\n", + " 7 number_of_open_complaints 9137 non-null object \n", + " 8 policy_type 9137 non-null object \n", + " 9 vehicle_class 9137 non-null object \n", + " 10 total_claim_amount 9137 non-null float64\n", + "dtypes: float64(3), object(8)\n", + "memory usage: 1.0+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "6b756dd4-31a6-4a2d-b007-d1815238aa1c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer 2937\n", + "state 2937\n", + "gender 3059\n", + "education 2937\n", + "customer_lifetime_value 2944\n", + "income 2937\n", + "monthly_premium_auto 2937\n", + "number_of_open_complaints 2937\n", + "policy_type 2937\n", + "vehicle_class 2937\n", + "total_claim_amount 2937\n", + "dtype: int64" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "73f819ea-5ae9-480b-9eb0-979d1f1145ee", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "education\n", + "Bachelor 2743\n", + "College 2682\n", + "High School or Below 2616\n", + "Master 752\n", + "Doctor 344\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Clean column \"customer_lifetime_value\"\n", + "df['customer_lifetime_value'] = df['customer_lifetime_value'].str.replace(\"%\", \"\")\n", + "df['education'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "3cc32489-138f-4577-99b5-1a22887b6e75", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "education\n", + "Bachelor 2743\n", + "College 2682\n", + "High School or Below 2616\n", + "Master 752\n", + "Doctor 344\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Clean column \"education\"\n", + "df['education'] = df['education'].str.replace(\"Bachelors\", \"Bachelor\")\n", + "df['education'].value_counts()" ] }, { @@ -72,14 +852,231 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26", "metadata": { "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
00DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
11KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
22LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
33XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
44QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
\n", + "

5 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 9 Corporate Auto Corporate L3 Offer3 \n", + "1 1 Personal Auto Personal L3 Offer4 \n", + "2 2 Personal Auto Personal L3 Offer3 \n", + "3 2 Corporate Auto Corporate L3 Offer2 \n", + "4 7 Personal Auto Personal L2 Offer1 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "0 Agent 292.800000 Four-Door Car Medsize \n", + "1 Call Center 744.924331 Four-Door Car Medsize \n", + "2 Call Center 480.000000 SUV Medsize \n", + "3 Branch 484.013411 Four-Door Car Medsize \n", + "4 Branch 707.925645 Four-Door Car Medsize \n", + "\n", + " vehicle_type month \n", + "0 A 2 \n", + "1 A 1 \n", + "2 A 2 \n", + "3 A 1 \n", + "4 A 1 \n", + "\n", + "[5 rows x 27 columns]" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code goes here" + "# Your code goes here\n", + "import pandas as pd\n", + "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\")\n", + "df.head()" ] }, { @@ -93,6 +1090,135 @@ "Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights." ] }, + { + "cell_type": "code", + "execution_count": 53, + "id": "7e88be17-e77f-4d5d-9e2b-4a25bd41d6a5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sales_channel\n", + "Agent 4121\n", + "Branch 3022\n", + "Call Center 2141\n", + "Web 1626\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['sales_channel'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "2cf8745e-e9f1-4d82-bf95-825e4dfd514f", + "metadata": {}, + "outputs": [], + "source": [ + "# Create pivot table: total revenue by sales channel\n", + "pivot_sales = pd.pivot_table(\n", + " df,\n", + " index='sales_channel', \n", + " values='customer_lifetime_value', \n", + " aggfunc='sum'\n", + ")\n", + "# Round the total revenue to 2 decimal points\n", + "pivot_sales = pivot_sales.round(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "e7a14ba4-cb83-4e8a-bd5e-6dc46140e27e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_lifetime_value
sales_channel
Agent33057887.85
Branch24359201.21
Call Center17364288.37
Web12697632.90
\n", + "
" + ], + "text/plain": [ + " customer_lifetime_value\n", + "sales_channel \n", + "Agent 33057887.85\n", + "Branch 24359201.21\n", + "Call Center 17364288.37\n", + "Web 12697632.90" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Sort descending to see top channel\n", + "pivot_sales = pivot_sales.sort_values(by='customer_lifetime_value', ascending=False)\n", + "pivot_sales" + ] + }, + { + "cell_type": "markdown", + "id": "505806ba-7ebb-422b-9370-bef2eec51443", + "metadata": {}, + "source": [ + "Some insights:\n", + "- Agent generated the most revenue.\n", + "- Web or online sales contribute the least revenue." + ] + }, { "cell_type": "markdown", "id": "640993b2-a291-436c-a34d-a551144f8196", @@ -103,6 +1229,110 @@ "2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights." ] }, + { + "cell_type": "code", + "execution_count": 58, + "id": "3a069e0b-b400-470e-904d-d17582191be4", + "metadata": { + "id": "3a069e0b-b400-470e-904d-d17582191be4" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
educationBachelorCollegeDoctorHigh School or BelowMaster
gender
F7874.2694787748.8233257328.5089168675.2202018157.053154
M7703.6016758052.4592887415.3336388149.6877838168.832659
\n", + "
" + ], + "text/plain": [ + "education Bachelor College Doctor High School or Below \\\n", + "gender \n", + "F 7874.269478 7748.823325 7328.508916 8675.220201 \n", + "M 7703.601675 8052.459288 7415.333638 8149.687783 \n", + "\n", + "education Master \n", + "gender \n", + "F 8157.053154 \n", + "M 8168.832659 " + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pivot_gender_edu = pd.pivot_table(\n", + " df,\n", + " index='gender', \n", + " columns='education', \n", + " values='customer_lifetime_value', \n", + " aggfunc='mean' \n", + ")\n", + "pivot_gender_edu" + ] + }, + { + "cell_type": "markdown", + "id": "511a4777-07f6-4ad4-8419-1a576e215d53", + "metadata": {}, + "source": [ + "Some insights:\n", + "- For Bachelor and High School or Below, females have slightly higher average customer lifetime value than males. \n", + "- For College, Doctor, and Master, males have slightly higher customer lifetime value than females.\n", + "- However, the differences between them are very small." + ] + }, { "cell_type": "markdown", "id": "32c7f2e5-3d90-43e5-be33-9781b6069198", @@ -127,18 +1357,6 @@ "\n", "*More information about long and wide format tables here: https://www.statology.org/long-vs-wide-data/*" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3a069e0b-b400-470e-904d-d17582191be4", - "metadata": { - "id": "3a069e0b-b400-470e-904d-d17582191be4" - }, - "outputs": [], - "source": [ - "# Your code goes here" - ] } ], "metadata": { @@ -160,7 +1378,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.9" } }, "nbformat": 4,