diff --git a/.ipynb_checkpoints/lab-dw-data-structuring-and-combining-checkpoint.ipynb b/.ipynb_checkpoints/lab-dw-data-structuring-and-combining-checkpoint.ipynb new file mode 100644 index 0000000..0c7802b --- /dev/null +++ b/.ipynb_checkpoints/lab-dw-data-structuring-and-combining-checkpoint.ipynb @@ -0,0 +1,711 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "25d7736c-ba17-4aff-b6bb-66eba20fbf4e", + "metadata": { + "id": "25d7736c-ba17-4aff-b6bb-66eba20fbf4e" + }, + "source": [ + "# Lab | Data Structuring and Combining Data" + ] + }, + { + "cell_type": "markdown", + "id": "a2cdfc70-44c8-478c-81e7-2bc43fdf4986", + "metadata": { + "id": "a2cdfc70-44c8-478c-81e7-2bc43fdf4986" + }, + "source": [ + "## Challenge 1: Combining & Cleaning Data\n", + "\n", + "In this challenge, we will be working with the customer data from an insurance company, as we did in the two previous labs. The data can be found here:\n", + "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\n", + "\n", + "But this time, we got new data, which can be found in the following 2 CSV files located at the links below.\n", + "\n", + "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\n", + "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\n", + "\n", + "Note that you'll need to clean and format the new data.\n", + "\n", + "Observation:\n", + "- One option is to first combine the three datasets and then apply the cleaning function to the new combined dataset\n", + "- Another option would be to read the clean file you saved in the previous lab, and just clean the two new files and concatenate the three clean datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "492d06e3-92c7-4105-ac72-536db98d3244", + "metadata": { + "id": "492d06e3-92c7-4105-ac72-536db98d3244" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total rows after combining: 12074\n", + "\n", + "Standardized Column Names: ['customer', 'education', 'customer_lifetime_value', 'income', 'monthly_premium_auto', 'number_of_open_complaints', 'policy_type', 'vehicle_class', 'total_claim_amount', 'state', 'gender']\n" + ] + } + ], + "source": [ + "# Your code goes here\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "csv_urls = [\n", + " 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv',\n", + " 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv',\n", + " 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv'\n", + "]\n", + "\n", + "df_list = [pd.read_csv(url) for url in csv_urls]\n", + "\n", + "customer_data = pd.concat(df_list, ignore_index = True)\n", + "\n", + "print(f\"Total rows after combining: {len(customer_data)}\")\n", + "customer_data.head()\n", + "\n", + "def clean_column_names(df):\n", + " df.columns = df.columns.str.replace(' ', '_')\n", + " df.columns = df.columns.str.replace('.', '', regex = False)\n", + " df.columns = df.columns.str.replace('/', '_', regex = False)\n", + "\n", + " return df\n", + "\n", + "customer_data = clean_column_names(customer_data)\n", + "\n", + "customer_data['state_merged'] = customer_data['State'].combine_first(customer_data['ST'])\n", + "customer_data = customer_data.drop(columns = ['ST', 'State'], errors = 'ignore')\n", + "customer_data = customer_data.rename(columns = {'state_merged':'state'})\n", + "\n", + "customer_data['gender_merged'] = customer_data['GENDER'].combine_first(customer_data['Gender'])\n", + "customer_data = customer_data.drop(columns = ['GENDER', 'Gender'], errors = 'ignore')\n", + "customer_data = customer_data.rename(columns = {'gender_merged':'gender'})\n", + "\n", + "customer_data.columns = customer_data.columns.str.lower()\n", + "print(\"\\nStandardized Column Names: \", customer_data.columns.to_list())" + ] + }, + { + "cell_type": "markdown", + "id": "31b8a9e7-7db9-4604-991b-ef6771603e57", + "metadata": { + "id": "31b8a9e7-7db9-4604-991b-ef6771603e57" + }, + "source": [ + "# Challenge 2: Structuring Data" + ] + }, + { + "cell_type": "markdown", + "id": "a877fd6d-7a0c-46d2-9657-f25036e4ca4b", + "metadata": { + "id": "a877fd6d-7a0c-46d2-9657-f25036e4ca4b" + }, + "source": [ + "In this challenge, we will continue to work with customer data from an insurance company, but we will use a dataset with more columns, called marketing_customer_analysis.csv, which can be found at the following link:\n", + "\n", + "https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\n", + "\n", + "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by performing data cleaning, formatting, and structuring." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26", + "metadata": { + "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 10910 entries, 0 to 10909\n", + "Data columns (total 27 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 unnamed:_0 10910 non-null int64 \n", + " 1 customer 10910 non-null object \n", + " 2 state 10910 non-null object \n", + " 3 customer_lifetime_value 10910 non-null float64\n", + " 4 response 10910 non-null object \n", + " 5 coverage 10910 non-null object \n", + " 6 education 10910 non-null object \n", + " 7 effective_to_date 10910 non-null object \n", + " 8 employmentstatus 10910 non-null object \n", + " 9 gender 10910 non-null object \n", + " 10 income 10910 non-null int64 \n", + " 11 location_code 10910 non-null object \n", + " 12 marital_status 10910 non-null object \n", + " 13 monthly_premium_auto 10910 non-null int64 \n", + " 14 months_since_last_claim 10910 non-null float64\n", + " 15 months_since_policy_inception 10910 non-null int64 \n", + " 16 number_of_open_complaints 10910 non-null float64\n", + " 17 number_of_policies 10910 non-null int64 \n", + " 18 policy_type 10910 non-null object \n", + " 19 policy 10910 non-null object \n", + " 20 renew_offer_type 10910 non-null object \n", + " 21 sales_channel 10910 non-null object \n", + " 22 total_claim_amount 10910 non-null float64\n", + " 23 vehicle_class 10910 non-null object \n", + " 24 vehicle_size 10910 non-null object \n", + " 25 vehicle_type 10910 non-null object \n", + " 26 month 10910 non-null int64 \n", + "dtypes: float64(4), int64(6), object(17)\n", + "memory usage: 2.2+ MB\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
00DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
11KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
22LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
33XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
44QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
..................................................................
1090510905FE99816Nevada15563.369440NoPremiumBachelor2011-01-19UnemployedF...7Personal AutoPersonal L1Offer3Web1214.400000Luxury CarMedsizeA1
1090610906KX53892Oregon5259.444853NoBasicCollege2011-01-06EmployedF...6Personal AutoPersonal L3Offer2Branch273.018929Four-Door CarMedsizeA1
1090710907TL39050Arizona23893.304100NoExtendedBachelor2011-02-06EmployedF...2Corporate AutoCorporate L3Offer1Web381.306996Luxury SUVMedsizeA2
1090810908WA60547California11971.977650NoPremiumCollege2011-02-13EmployedF...6Personal AutoPersonal L1Offer1Branch618.288849SUVMedsizeA2
1090910909IV32877California6857.519928NoBasicBachelor2011-01-08UnemployedM...3Personal AutoPersonal L1Offer4Web1021.719397SUVMedsizeA1
\n", + "

10910 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "... ... ... ... ... ... \n", + "10905 10905 FE99816 Nevada 15563.369440 No \n", + "10906 10906 KX53892 Oregon 5259.444853 No \n", + "10907 10907 TL39050 Arizona 23893.304100 No \n", + "10908 10908 WA60547 California 11971.977650 No \n", + "10909 10909 IV32877 California 6857.519928 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "... ... ... ... ... ... ... \n", + "10905 Premium Bachelor 2011-01-19 Unemployed F ... \n", + "10906 Basic College 2011-01-06 Employed F ... \n", + "10907 Extended Bachelor 2011-02-06 Employed F ... \n", + "10908 Premium College 2011-02-13 Employed F ... \n", + "10909 Basic Bachelor 2011-01-08 Unemployed M ... \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 9 Corporate Auto Corporate L3 Offer3 \n", + "1 1 Personal Auto Personal L3 Offer4 \n", + "2 2 Personal Auto Personal L3 Offer3 \n", + "3 2 Corporate Auto Corporate L3 Offer2 \n", + "4 7 Personal Auto Personal L2 Offer1 \n", + "... ... ... ... ... \n", + "10905 7 Personal Auto Personal L1 Offer3 \n", + "10906 6 Personal Auto Personal L3 Offer2 \n", + "10907 2 Corporate Auto Corporate L3 Offer1 \n", + "10908 6 Personal Auto Personal L1 Offer1 \n", + "10909 3 Personal Auto Personal L1 Offer4 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "0 Agent 292.800000 Four-Door Car Medsize \n", + "1 Call Center 744.924331 Four-Door Car Medsize \n", + "2 Call Center 480.000000 SUV Medsize \n", + "3 Branch 484.013411 Four-Door Car Medsize \n", + "4 Branch 707.925645 Four-Door Car Medsize \n", + "... ... ... ... ... \n", + "10905 Web 1214.400000 Luxury Car Medsize \n", + "10906 Branch 273.018929 Four-Door Car Medsize \n", + "10907 Web 381.306996 Luxury SUV Medsize \n", + "10908 Branch 618.288849 SUV Medsize \n", + "10909 Web 1021.719397 SUV Medsize \n", + "\n", + " vehicle_type month \n", + "0 A 2 \n", + "1 A 1 \n", + "2 A 2 \n", + "3 A 1 \n", + "4 A 1 \n", + "... ... ... \n", + "10905 A 1 \n", + "10906 A 1 \n", + "10907 A 2 \n", + "10908 A 2 \n", + "10909 A 1 \n", + "\n", + "[10910 rows x 27 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code goes here\n", + "\n", + "import pandas as pd\n", + "\n", + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\"\n", + "df = pd.read_csv(url)\n", + "df.head\n", + "\n", + "df.info()\n", + "df.describe()\n", + "\n", + "df.columns = df.columns.str.lower().str.replace(\" \", \"_\")\n", + "df.columns\n", + "df.isnull().sum()\n", + "\n", + "df[\"income\"] = df[\"income\"].fillna(df[\"income\"].median())\n", + "df[\"customer_lifetime_value\"] = pd.to_numeric(df[\"customer_lifetime_value\"], errors=\"coerce\")\n", + "df[\"monthly_premium_auto\"] = pd.to_numeric(df[\"monthly_premium_auto\"], errors=\"coerce\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3551ba94-6f4d-4d6f-848b-8148e6c86a75", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " income\n", + "sales_channel \n", + "Agent 152490152\n", + "Branch 113775608\n", + "Call Center 81055004\n", + "Web 62200103\n" + ] + } + ], + "source": [ + "pivot_rev = df.pivot_table(index=\"sales_channel\",\n", + " values=\"income\",\n", + " aggfunc=\"sum\").round(2)\n", + "print(pivot_rev)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "0a7d24a6-d257-48a5-897a-5d4bff8013e6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "education Bachelor College Doctor High School or Below Master\n", + "gender \n", + "F 7874.27 7748.82 7328.51 8675.22 8157.05\n", + "M 7703.60 8052.46 7415.33 8149.69 8168.83\n" + ] + } + ], + "source": [ + "pivot_clv = df.pivot_table(index=\"gender\",\n", + " columns=\"education\",\n", + " values=\"customer_lifetime_value\",\n", + " aggfunc=\"mean\").round(2)\n", + "print(pivot_clv)" + ] + }, + { + "cell_type": "markdown", + "id": "df35fd0d-513e-4e77-867e-429da10a9cc7", + "metadata": { + "id": "df35fd0d-513e-4e77-867e-429da10a9cc7" + }, + "source": [ + "1. You work at the marketing department and you want to know which sales channel brought the most sales in terms of total revenue. Using pivot, create a summary table showing the total revenue for each sales channel (branch, call center, web, and mail).\n", + "Round the total revenue to 2 decimal points. Analyze the resulting table to draw insights." + ] + }, + { + "cell_type": "markdown", + "id": "640993b2-a291-436c-a34d-a551144f8196", + "metadata": { + "id": "640993b2-a291-436c-a34d-a551144f8196" + }, + "source": [ + "2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights." + ] + }, + { + "cell_type": "markdown", + "id": "32c7f2e5-3d90-43e5-be33-9781b6069198", + "metadata": { + "id": "32c7f2e5-3d90-43e5-be33-9781b6069198" + }, + "source": [ + "## Bonus\n", + "\n", + "You work at the customer service department and you want to know which months had the highest number of complaints by policy type category. Create a summary table showing the number of complaints by policy type and month.\n", + "Show it in a long format table." + ] + }, + { + "cell_type": "markdown", + "id": "e3d09a8f-953c-448a-a5f8-2e5a8cca7291", + "metadata": { + "id": "e3d09a8f-953c-448a-a5f8-2e5a8cca7291" + }, + "source": [ + "*In data analysis, a long format table is a way of structuring data in which each observation or measurement is stored in a separate row of the table. The key characteristic of a long format table is that each column represents a single variable, and each row represents a single observation of that variable.*\n", + "\n", + "*More information about long and wide format tables here: https://www.statology.org/long-vs-wide-data/*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a069e0b-b400-470e-904d-d17582191be4", + "metadata": { + "id": "3a069e0b-b400-470e-904d-d17582191be4" + }, + "outputs": [], + "source": [ + "# Your code goes here" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb index ec4e3f9..0c7802b 100644 --- a/lab-dw-data-structuring-and-combining.ipynb +++ b/lab-dw-data-structuring-and-combining.ipynb @@ -36,14 +36,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "492d06e3-92c7-4105-ac72-536db98d3244", "metadata": { "id": "492d06e3-92c7-4105-ac72-536db98d3244" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total rows after combining: 12074\n", + "\n", + "Standardized Column Names: ['customer', 'education', 'customer_lifetime_value', 'income', 'monthly_premium_auto', 'number_of_open_complaints', 'policy_type', 'vehicle_class', 'total_claim_amount', 'state', 'gender']\n" + ] + } + ], "source": [ - "# Your code goes here" + "# Your code goes here\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "csv_urls = [\n", + " 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv',\n", + " 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv',\n", + " 'https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv'\n", + "]\n", + "\n", + "df_list = [pd.read_csv(url) for url in csv_urls]\n", + "\n", + "customer_data = pd.concat(df_list, ignore_index = True)\n", + "\n", + "print(f\"Total rows after combining: {len(customer_data)}\")\n", + "customer_data.head()\n", + "\n", + "def clean_column_names(df):\n", + " df.columns = df.columns.str.replace(' ', '_')\n", + " df.columns = df.columns.str.replace('.', '', regex = False)\n", + " df.columns = df.columns.str.replace('/', '_', regex = False)\n", + "\n", + " return df\n", + "\n", + "customer_data = clean_column_names(customer_data)\n", + "\n", + "customer_data['state_merged'] = customer_data['State'].combine_first(customer_data['ST'])\n", + "customer_data = customer_data.drop(columns = ['ST', 'State'], errors = 'ignore')\n", + "customer_data = customer_data.rename(columns = {'state_merged':'state'})\n", + "\n", + "customer_data['gender_merged'] = customer_data['GENDER'].combine_first(customer_data['Gender'])\n", + "customer_data = customer_data.drop(columns = ['GENDER', 'Gender'], errors = 'ignore')\n", + "customer_data = customer_data.rename(columns = {'gender_merged':'gender'})\n", + "\n", + "customer_data.columns = customer_data.columns.str.lower()\n", + "print(\"\\nStandardized Column Names: \", customer_data.columns.to_list())" ] }, { @@ -72,14 +118,511 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26", "metadata": { "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 10910 entries, 0 to 10909\n", + "Data columns (total 27 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 unnamed:_0 10910 non-null int64 \n", + " 1 customer 10910 non-null object \n", + " 2 state 10910 non-null object \n", + " 3 customer_lifetime_value 10910 non-null float64\n", + " 4 response 10910 non-null object \n", + " 5 coverage 10910 non-null object \n", + " 6 education 10910 non-null object \n", + " 7 effective_to_date 10910 non-null object \n", + " 8 employmentstatus 10910 non-null object \n", + " 9 gender 10910 non-null object \n", + " 10 income 10910 non-null int64 \n", + " 11 location_code 10910 non-null object \n", + " 12 marital_status 10910 non-null object \n", + " 13 monthly_premium_auto 10910 non-null int64 \n", + " 14 months_since_last_claim 10910 non-null float64\n", + " 15 months_since_policy_inception 10910 non-null int64 \n", + " 16 number_of_open_complaints 10910 non-null float64\n", + " 17 number_of_policies 10910 non-null int64 \n", + " 18 policy_type 10910 non-null object \n", + " 19 policy 10910 non-null object \n", + " 20 renew_offer_type 10910 non-null object \n", + " 21 sales_channel 10910 non-null object \n", + " 22 total_claim_amount 10910 non-null float64\n", + " 23 vehicle_class 10910 non-null object \n", + " 24 vehicle_size 10910 non-null object \n", + " 25 vehicle_type 10910 non-null object \n", + " 26 month 10910 non-null int64 \n", + "dtypes: float64(4), int64(6), object(17)\n", + "memory usage: 2.2+ MB\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_typemonth
00DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM...9Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeA2
11KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF...1Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeA1
22LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM...2Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA2
33XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM...2Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA1
44QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF...7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeA1
..................................................................
1090510905FE99816Nevada15563.369440NoPremiumBachelor2011-01-19UnemployedF...7Personal AutoPersonal L1Offer3Web1214.400000Luxury CarMedsizeA1
1090610906KX53892Oregon5259.444853NoBasicCollege2011-01-06EmployedF...6Personal AutoPersonal L3Offer2Branch273.018929Four-Door CarMedsizeA1
1090710907TL39050Arizona23893.304100NoExtendedBachelor2011-02-06EmployedF...2Corporate AutoCorporate L3Offer1Web381.306996Luxury SUVMedsizeA2
1090810908WA60547California11971.977650NoPremiumCollege2011-02-13EmployedF...6Personal AutoPersonal L1Offer1Branch618.288849SUVMedsizeA2
1090910909IV32877California6857.519928NoBasicBachelor2011-01-08UnemployedM...3Personal AutoPersonal L1Offer4Web1021.719397SUVMedsizeA1
\n", + "

10910 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "... ... ... ... ... ... \n", + "10905 10905 FE99816 Nevada 15563.369440 No \n", + "10906 10906 KX53892 Oregon 5259.444853 No \n", + "10907 10907 TL39050 Arizona 23893.304100 No \n", + "10908 10908 WA60547 California 11971.977650 No \n", + "10909 10909 IV32877 California 6857.519928 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "... ... ... ... ... ... ... \n", + "10905 Premium Bachelor 2011-01-19 Unemployed F ... \n", + "10906 Basic College 2011-01-06 Employed F ... \n", + "10907 Extended Bachelor 2011-02-06 Employed F ... \n", + "10908 Premium College 2011-02-13 Employed F ... \n", + "10909 Basic Bachelor 2011-01-08 Unemployed M ... \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 9 Corporate Auto Corporate L3 Offer3 \n", + "1 1 Personal Auto Personal L3 Offer4 \n", + "2 2 Personal Auto Personal L3 Offer3 \n", + "3 2 Corporate Auto Corporate L3 Offer2 \n", + "4 7 Personal Auto Personal L2 Offer1 \n", + "... ... ... ... ... \n", + "10905 7 Personal Auto Personal L1 Offer3 \n", + "10906 6 Personal Auto Personal L3 Offer2 \n", + "10907 2 Corporate Auto Corporate L3 Offer1 \n", + "10908 6 Personal Auto Personal L1 Offer1 \n", + "10909 3 Personal Auto Personal L1 Offer4 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "0 Agent 292.800000 Four-Door Car Medsize \n", + "1 Call Center 744.924331 Four-Door Car Medsize \n", + "2 Call Center 480.000000 SUV Medsize \n", + "3 Branch 484.013411 Four-Door Car Medsize \n", + "4 Branch 707.925645 Four-Door Car Medsize \n", + "... ... ... ... ... \n", + "10905 Web 1214.400000 Luxury Car Medsize \n", + "10906 Branch 273.018929 Four-Door Car Medsize \n", + "10907 Web 381.306996 Luxury SUV Medsize \n", + "10908 Branch 618.288849 SUV Medsize \n", + "10909 Web 1021.719397 SUV Medsize \n", + "\n", + " vehicle_type month \n", + "0 A 2 \n", + "1 A 1 \n", + "2 A 2 \n", + "3 A 1 \n", + "4 A 1 \n", + "... ... ... \n", + "10905 A 1 \n", + "10906 A 1 \n", + "10907 A 2 \n", + "10908 A 2 \n", + "10909 A 1 \n", + "\n", + "[10910 rows x 27 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code goes here" + "# Your code goes here\n", + "\n", + "import pandas as pd\n", + "\n", + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\"\n", + "df = pd.read_csv(url)\n", + "df.head\n", + "\n", + "df.info()\n", + "df.describe()\n", + "\n", + "df.columns = df.columns.str.lower().str.replace(\" \", \"_\")\n", + "df.columns\n", + "df.isnull().sum()\n", + "\n", + "df[\"income\"] = df[\"income\"].fillna(df[\"income\"].median())\n", + "df[\"customer_lifetime_value\"] = pd.to_numeric(df[\"customer_lifetime_value\"], errors=\"coerce\")\n", + "df[\"monthly_premium_auto\"] = pd.to_numeric(df[\"monthly_premium_auto\"], errors=\"coerce\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3551ba94-6f4d-4d6f-848b-8148e6c86a75", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " income\n", + "sales_channel \n", + "Agent 152490152\n", + "Branch 113775608\n", + "Call Center 81055004\n", + "Web 62200103\n" + ] + } + ], + "source": [ + "pivot_rev = df.pivot_table(index=\"sales_channel\",\n", + " values=\"income\",\n", + " aggfunc=\"sum\").round(2)\n", + "print(pivot_rev)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "0a7d24a6-d257-48a5-897a-5d4bff8013e6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "education Bachelor College Doctor High School or Below Master\n", + "gender \n", + "F 7874.27 7748.82 7328.51 8675.22 8157.05\n", + "M 7703.60 8052.46 7415.33 8149.69 8168.83\n" + ] + } + ], + "source": [ + "pivot_clv = df.pivot_table(index=\"gender\",\n", + " columns=\"education\",\n", + " values=\"customer_lifetime_value\",\n", + " aggfunc=\"mean\").round(2)\n", + "print(pivot_clv)" ] }, { @@ -160,7 +703,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4,