diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb index fadd718..b291e02 100644 --- a/lab-dw-aggregating.ipynb +++ b/lab-dw-aggregating.ipynb @@ -24,6 +24,1092 @@ "This dataset contains information such as customer demographics, policy details, vehicle information, and the customer's response to the last marketing campaign. Our goal is to explore and analyze this data by first performing data cleaning, formatting, and structuring." ] }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3c03c548", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10910, 26)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n", + "\n", + "df = pd.read_csv(url)\n", + "\n", + "\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b07e1b84", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0CustomerStateCustomer Lifetime ValueResponseCoverageEducationEffective To DateEmploymentStatusGender...Number of Open ComplaintsNumber of PoliciesPolicy TypePolicyRenew Offer TypeSales ChannelTotal Claim AmountVehicle ClassVehicle SizeVehicle Type
00DK49336Arizona4809.216960NoBasicCollege2/18/11EmployedM...0.09Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeNaN
11KX64629California2228.525238NoBasicCollege1/18/11UnemployedF...0.01Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeNaN
22LZ68649Washington14947.917300NoBasicBachelor2/10/11EmployedM...0.02Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA
33XL78013Oregon22332.439460YesExtendedCollege1/11/11EmployedM...0.02Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA
44QA50777Oregon9025.067525NoPremiumBachelor1/17/11Medical LeaveF...NaN7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 Customer State Customer Lifetime Value Response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " Coverage Education Effective To Date EmploymentStatus Gender ... \\\n", + "0 Basic College 2/18/11 Employed M ... \n", + "1 Basic College 1/18/11 Unemployed F ... \n", + "2 Basic Bachelor 2/10/11 Employed M ... \n", + "3 Extended College 1/11/11 Employed M ... \n", + "4 Premium Bachelor 1/17/11 Medical Leave F ... \n", + "\n", + " Number of Open Complaints Number of Policies Policy Type Policy \\\n", + "0 0.0 9 Corporate Auto Corporate L3 \n", + "1 0.0 1 Personal Auto Personal L3 \n", + "2 0.0 2 Personal Auto Personal L3 \n", + "3 0.0 2 Corporate Auto Corporate L3 \n", + "4 NaN 7 Personal Auto Personal L2 \n", + "\n", + " Renew Offer Type Sales Channel Total Claim Amount Vehicle Class \\\n", + "0 Offer3 Agent 292.800000 Four-Door Car \n", + "1 Offer4 Call Center 744.924331 Four-Door Car \n", + "2 Offer3 Call Center 480.000000 SUV \n", + "3 Offer2 Branch 484.013411 Four-Door Car \n", + "4 Offer1 Branch 707.925645 Four-Door Car \n", + "\n", + " Vehicle Size Vehicle Type \n", + "0 Medsize NaN \n", + "1 Medsize NaN \n", + "2 Medsize A \n", + "3 Medsize A \n", + "4 Medsize NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9456bb7a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['unnamed:_0', 'customer', 'state', 'customer_lifetime_value',\n", + " 'response', 'coverage', 'education', 'effective_to_date',\n", + " 'employmentstatus', 'gender', 'income', 'location_code',\n", + " 'marital_status', 'monthly_premium_auto', 'months_since_last_claim',\n", + " 'months_since_policy_inception', 'number_of_open_complaints',\n", + " 'number_of_policies', 'policy_type', 'policy', 'renew_offer_type',\n", + " 'sales_channel', 'total_claim_amount', 'vehicle_class', 'vehicle_size',\n", + " 'vehicle_type'],\n", + " dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns = df.columns.str.replace(\"ST\", \"state\")\n", + "\n", + "df.columns = (\n", + " df.columns\n", + " .str.lower()\n", + " .str.replace(\" \", \"_\")\n", + ")\n", + "\n", + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e55fd1db", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_type
00DK49336Arizona4809.216960NoBasicCollege2/18/11EmployedM...0.09Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeNaN
11KX64629California2228.525238NoBasicCollege1/18/11UnemployedF...0.01Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeNaN
22LZ68649Washington14947.917300NoBasicBachelor2/10/11EmployedM...0.02Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA
33XL78013Oregon22332.439460YesExtendedCollege1/11/11EmployedM...0.02Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA
44QA50777Oregon9025.067525NoPremiumBachelor1/17/11Medical LeaveF...NaN7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2/18/11 Employed M ... \n", + "1 Basic College 1/18/11 Unemployed F ... \n", + "2 Basic Bachelor 2/10/11 Employed M ... \n", + "3 Extended College 1/11/11 Employed M ... \n", + "4 Premium Bachelor 1/17/11 Medical Leave F ... \n", + "\n", + " number_of_open_complaints number_of_policies policy_type policy \\\n", + "0 0.0 9 Corporate Auto Corporate L3 \n", + "1 0.0 1 Personal Auto Personal L3 \n", + "2 0.0 2 Personal Auto Personal L3 \n", + "3 0.0 2 Corporate Auto Corporate L3 \n", + "4 NaN 7 Personal Auto Personal L2 \n", + "\n", + " renew_offer_type sales_channel total_claim_amount vehicle_class \\\n", + "0 Offer3 Agent 292.800000 Four-Door Car \n", + "1 Offer4 Call Center 744.924331 Four-Door Car \n", + "2 Offer3 Call Center 480.000000 SUV \n", + "3 Offer2 Branch 484.013411 Four-Door Car \n", + "4 Offer1 Branch 707.925645 Four-Door Car \n", + "\n", + " vehicle_size vehicle_type \n", + "0 Medsize NaN \n", + "1 Medsize NaN \n", + "2 Medsize A \n", + "3 Medsize A \n", + "4 Medsize NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gender_map = {\n", + " \"M\": \"M\",\n", + " \"Male\": \"M\",\n", + " \"male\": \"M\",\n", + " \"F\": \"F\",\n", + " \"Femal\": \"F\",\n", + " \"Female\": \"F\",\n", + " \"female\": \"F\"\n", + "}\n", + "\n", + "# Standardize gender column using map()\n", + "df['gender'] = df['gender'].replace(gender_map)\n", + "\n", + "state_map = {\n", + " \"AZ\": \"Arizona\",\n", + " \"Cali\": \"California\",\n", + " \"WA\": \"Washington\",\n", + "}\n", + "\n", + "# Standardize gender column using map()\n", + "df['state'] = df['state'].replace(state_map)\n", + "\n", + "df['education'] = df['education'].replace(\"Bachelors\", \"Bachelor\")\n", + "\n", + "# Remove '%' character from customer_lifetime_value\n", + "#df4['customer_lifetime_value'] = df4['customer_lifetime_value'].str.replace('%', '', regex=False)\n", + "\n", + "\n", + "vehicle_map = {\n", + " \"Sports Car\": \"Luxury\",\n", + " \"Luxury SUV\": \"Luxury\",\n", + " \"Luxury Car\": \"Luxury\",\n", + "}\n", + "\n", + "# Standardize gender column using map()\n", + "df['vehicle_class'] = df['vehicle_class'].replace(vehicle_map)\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6585dc89", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "unnamed:_0 int64\n", + "customer object\n", + "state object\n", + "customer_lifetime_value float64\n", + "response object\n", + "coverage object\n", + "education object\n", + "effective_to_date object\n", + "employmentstatus object\n", + "gender object\n", + "income int64\n", + "location_code object\n", + "marital_status object\n", + "monthly_premium_auto int64\n", + "months_since_last_claim float64\n", + "months_since_policy_inception int64\n", + "number_of_open_complaints float64\n", + "number_of_policies int64\n", + "policy_type object\n", + "policy object\n", + "renew_offer_type object\n", + "sales_channel object\n", + "total_claim_amount float64\n", + "vehicle_class object\n", + "vehicle_size object\n", + "vehicle_type object\n", + "dtype: object" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "159e55a8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Sofia\\AppData\\Local\\Temp\\ipykernel_23784\\2902342384.py:3: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n", + " df = df.fillna(method=\"ffill\")\n" + ] + } + ], + "source": [ + "df.isnull().sum()\n", + "\n", + "df = df.fillna(method=\"ffill\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "db9725ac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "unnamed:_0 0\n", + "customer 0\n", + "state 0\n", + "customer_lifetime_value 0\n", + "response 0\n", + "coverage 0\n", + "education 0\n", + "effective_to_date 0\n", + "employmentstatus 0\n", + "gender 0\n", + "income 0\n", + "location_code 0\n", + "marital_status 0\n", + "monthly_premium_auto 0\n", + "months_since_last_claim 0\n", + "months_since_policy_inception 0\n", + "number_of_open_complaints 0\n", + "number_of_policies 0\n", + "policy_type 0\n", + "policy 0\n", + "renew_offer_type 0\n", + "sales_channel 0\n", + "total_claim_amount 0\n", + "vehicle_class 0\n", + "vehicle_size 0\n", + "vehicle_type 2\n", + "dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "66c9ae13", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "np.int64(0)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.duplicated().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "08aa7f1a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_type
33XL78013Oregon22332.439460YesExtendedCollege1/11/11EmployedM...0.02Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA
88FM55990California5989.773931YesPremiumCollege1/19/11EmployedM...0.01Personal AutoPersonal L1Offer2Branch739.200000LuxuryMedsizeA
1515CW49887California4626.801093YesBasicMaster1/16/11EmployedF...0.01Special AutoSpecial L1Offer2Branch547.200000SUVMedsizeA
1919NJ54277California3746.751625YesExtendedCollege2/26/11EmployedF...1.01Personal AutoPersonal L2Offer2Call Center19.575683Two-Door CarLargeA
2727MQ68407Oregon4376.363592YesPremiumBachelor2/28/11EmployedF...0.01Personal AutoPersonal L3Offer2Agent60.036683Four-Door CarMedsizeA
..................................................................
1084410844FM31768Arizona5979.724161YesExtendedHigh School or Below2/7/11EmployedF...0.03Personal AutoPersonal L1Offer2Agent547.200000Four-Door CarMedsizeA
1085210852KZ80424Washington8382.478392YesBasicBachelor1/27/11EmployedM...0.02Personal AutoPersonal L2Offer2Call Center791.878042Four-Door CarLargeA
1087210872XT67997California5979.724161YesExtendedHigh School or Below2/7/11EmployedF...0.03Personal AutoPersonal L3Offer2Agent547.200000Four-Door CarMedsizeA
1088710887BY78730Oregon8879.790017YesBasicHigh School or Below2/3/11EmployedF...0.07Special AutoSpecial L2Offer1Agent528.200860SUVSmallA
1089710897MM70762Arizona9075.768214YesBasicMaster1/26/11EmployedM...0.08Personal AutoPersonal L1Offer1Agent158.077504LuxuryMedsizeA
\n", + "

1489 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "8 8 FM55990 California 5989.773931 Yes \n", + "15 15 CW49887 California 4626.801093 Yes \n", + "19 19 NJ54277 California 3746.751625 Yes \n", + "27 27 MQ68407 Oregon 4376.363592 Yes \n", + "... ... ... ... ... ... \n", + "10844 10844 FM31768 Arizona 5979.724161 Yes \n", + "10852 10852 KZ80424 Washington 8382.478392 Yes \n", + "10872 10872 XT67997 California 5979.724161 Yes \n", + "10887 10887 BY78730 Oregon 8879.790017 Yes \n", + "10897 10897 MM70762 Arizona 9075.768214 Yes \n", + "\n", + " coverage education effective_to_date employmentstatus \\\n", + "3 Extended College 1/11/11 Employed \n", + "8 Premium College 1/19/11 Employed \n", + "15 Basic Master 1/16/11 Employed \n", + "19 Extended College 2/26/11 Employed \n", + "27 Premium Bachelor 2/28/11 Employed \n", + "... ... ... ... ... \n", + "10844 Extended High School or Below 2/7/11 Employed \n", + "10852 Basic Bachelor 1/27/11 Employed \n", + "10872 Extended High School or Below 2/7/11 Employed \n", + "10887 Basic High School or Below 2/3/11 Employed \n", + "10897 Basic Master 1/26/11 Employed \n", + "\n", + " gender ... number_of_open_complaints number_of_policies \\\n", + "3 M ... 0.0 2 \n", + "8 M ... 0.0 1 \n", + "15 F ... 0.0 1 \n", + "19 F ... 1.0 1 \n", + "27 F ... 0.0 1 \n", + "... ... ... ... ... \n", + "10844 F ... 0.0 3 \n", + "10852 M ... 0.0 2 \n", + "10872 F ... 0.0 3 \n", + "10887 F ... 0.0 7 \n", + "10897 M ... 0.0 8 \n", + "\n", + " policy_type policy renew_offer_type sales_channel \\\n", + "3 Corporate Auto Corporate L3 Offer2 Branch \n", + "8 Personal Auto Personal L1 Offer2 Branch \n", + "15 Special Auto Special L1 Offer2 Branch \n", + "19 Personal Auto Personal L2 Offer2 Call Center \n", + "27 Personal Auto Personal L3 Offer2 Agent \n", + "... ... ... ... ... \n", + "10844 Personal Auto Personal L1 Offer2 Agent \n", + "10852 Personal Auto Personal L2 Offer2 Call Center \n", + "10872 Personal Auto Personal L3 Offer2 Agent \n", + "10887 Special Auto Special L2 Offer1 Agent \n", + "10897 Personal Auto Personal L1 Offer1 Agent \n", + "\n", + " total_claim_amount vehicle_class vehicle_size vehicle_type \n", + "3 484.013411 Four-Door Car Medsize A \n", + "8 739.200000 Luxury Medsize A \n", + "15 547.200000 SUV Medsize A \n", + "19 19.575683 Two-Door Car Large A \n", + "27 60.036683 Four-Door Car Medsize A \n", + "... ... ... ... ... \n", + "10844 547.200000 Four-Door Car Medsize A \n", + "10852 791.878042 Four-Door Car Large A \n", + "10872 547.200000 Four-Door Car Medsize A \n", + "10887 528.200860 SUV Small A \n", + "10897 158.077504 Luxury Medsize A \n", + "\n", + "[1489 rows x 26 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_low = pd.DataFrame(df[(df[\"total_claim_amount\"] <= 1000) & (df[\"response\"] == \"Yes\")])\n", + "\n", + "df_low" + ] + }, { "cell_type": "markdown", "id": "9c98ddc5-b041-4c94-ada1-4dfee5c98e50", @@ -36,6 +1122,63 @@ " - have a response \"Yes\" to the last marketing campaign." ] }, + { + "cell_type": "code", + "execution_count": 15, + "id": "7086ed52", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "gender policy_type \n", + "F Corporate Auto 7795.525426\n", + " Personal Auto 8250.423043\n", + " Special Auto 7691.584111\n", + "M Corporate Auto 7859.043803\n", + " Personal Auto 7342.467252\n", + " Special Auto 8247.088702\n", + "Name: customer_lifetime_value, dtype: float64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_yes = pd.DataFrame(df[df[\"response\"] == \"Yes\"])\n", + "df_yes.groupby(['gender', \"policy_type\"])['customer_lifetime_value'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "b10eaef3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "gender policy_type \n", + "F Corporate Auto 436.098218\n", + " Personal Auto 447.308752\n", + " Special Auto 453.280164\n", + "M Corporate Auto 405.145294\n", + " Personal Auto 454.877533\n", + " Special Auto 429.527942\n", + "Name: total_claim_amount, dtype: float64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_yes.groupby(['gender', \"policy_type\"])['total_claim_amount'].mean()" + ] + }, { "cell_type": "markdown", "id": "b9be383e-5165-436e-80c8-57d4c757c8c3", @@ -48,6 +1191,34 @@ " - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company." ] }, + { + "cell_type": "code", + "execution_count": 22, + "id": "1d92a7bd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "state\n", + "Arizona 2055\n", + "California 3789\n", + "Nevada 1055\n", + "Oregon 3062\n", + "Washington 949\n", + "Name: customer, dtype: int64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby(\"state\")[\"customer\"].count()\n", + "#df[\"state\"].nunique()" + ] + }, { "cell_type": "markdown", "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0", @@ -58,6 +1229,137 @@ "3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers." ] }, + { + "cell_type": "code", + "execution_count": 23, + "id": "ecccfebe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
maxmincount
gendereducation
FBachelor73225.956521904.0008521700
College61850.188031898.6836861608
Doctor44856.113972395.570000200
High School or Below55277.445892144.9215351590
Master51016.067042417.777032475
MBachelor67907.270501898.0076751572
College61134.683071918.1197001596
Doctor32677.342842267.604038201
High School or Below83325.381191940.9812211556
Master50568.259122272.307310412
\n", + "
" + ], + "text/plain": [ + " max min count\n", + "gender education \n", + "F Bachelor 73225.95652 1904.000852 1700\n", + " College 61850.18803 1898.683686 1608\n", + " Doctor 44856.11397 2395.570000 200\n", + " High School or Below 55277.44589 2144.921535 1590\n", + " Master 51016.06704 2417.777032 475\n", + "M Bachelor 67907.27050 1898.007675 1572\n", + " College 61134.68307 1918.119700 1596\n", + " Doctor 32677.34284 2267.604038 201\n", + " High School or Below 83325.38119 1940.981221 1556\n", + " Master 50568.25912 2272.307310 412" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby(['gender', \"education\"])['customer_lifetime_value'].agg([\"max\", \"min\", \"count\"])" + ] + }, { "cell_type": "markdown", "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d", @@ -78,6 +1380,49 @@ "## Bonus" ] }, + { + "cell_type": "code", + "execution_count": 26, + "id": "8d2917cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "month February January\n", + "state \n", + "Arizona 990 1065\n", + "California 1758 2031\n", + "Nevada 477 578\n", + "Oregon 1413 1649\n", + "Washington 454 495\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Sofia\\AppData\\Local\\Temp\\ipykernel_23784\\4207120029.py:2: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " df['month'] = pd.to_datetime(df['effective_to_date']).dt.month_name()\n" + ] + } + ], + "source": [ + "# Count number of policies sold by state and month\n", + "df['month'] = pd.to_datetime(df['effective_to_date']).dt.month_name()\n", + "\n", + "policies_by_state_month = df.pivot_table(\n", + " index='state',\n", + " columns='month',\n", + " values='policy',\n", + " aggfunc='count',\n", + " fill_value=0\n", + ")\n", + "\n", + "print(policies_by_state_month)" + ] + }, { "cell_type": "markdown", "id": "81ff02c5-6584-4f21-a358-b918697c6432", @@ -88,6 +1433,41 @@ "5. The marketing team wants to analyze the number of policies sold by state and month. Present the data in a table where the months are arranged as columns and the states are arranged as rows." ] }, + { + "cell_type": "code", + "execution_count": 28, + "id": "1c3053fc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "month February January\n", + "state \n", + "Arizona 990 1065\n", + "California 1758 2031\n", + "Oregon 1413 1649\n" + ] + } + ], + "source": [ + "# Filter DataFrame for top 3 states\n", + "top_states = [\"Arizona\", \"California\", \"Oregon\"]\n", + "\n", + "df_top_states = df[df['state'].isin(top_states)]\n", + "\n", + "# Pivot table: number of policies sold by month for top 3 states\n", + "top_states_pivot = df_top_states.pivot_table(\n", + " index='state',\n", + " columns='month',\n", + " values='policy',\n", + " aggfunc='count',\n", + ")\n", + "\n", + "print(top_states_pivot)" + ] + }, { "cell_type": "markdown", "id": "b6aec097-c633-4017-a125-e77a97259cda", @@ -103,6 +1483,140 @@ "- *Finally, you will create a new DataFrame that contains the number of policies sold by month for each of the top 3 states.*" ] }, + { + "cell_type": "code", + "execution_count": 33, + "id": "87a6ba38", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sales_channelvariablevalue
3BranchresponseYes
8BranchresponseYes
15BranchresponseYes
19Call CenterresponseYes
27AgentresponseYes
............
10844AgentresponseYes
10852Call CenterresponseYes
10872AgentresponseYes
10887AgentresponseYes
10897AgentresponseYes
\n", + "

1558 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " sales_channel variable value\n", + "3 Branch response Yes\n", + "8 Branch response Yes\n", + "15 Branch response Yes\n", + "19 Call Center response Yes\n", + "27 Agent response Yes\n", + "... ... ... ...\n", + "10844 Agent response Yes\n", + "10852 Call Center response Yes\n", + "10872 Agent response Yes\n", + "10887 Agent response Yes\n", + "10897 Agent response Yes\n", + "\n", + "[1558 rows x 3 columns]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unpivoted = pd.melt(df, id_vars=[\"sales_channel\"], value_vars=[\"response\"])\n", + "\n", + "unpivoted_yes = unpivoted[unpivoted['value'] == \"Yes\"]\n", + "\n", + "unpivoted_yes" + ] + }, { "cell_type": "markdown", "id": "ba975b8a-a2cf-4fbf-9f59-ebc381767009", @@ -143,7 +1657,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -157,7 +1671,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4,