diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..862f74f
Binary files /dev/null and b/.DS_Store differ
diff --git a/LabsDataAggregationAndFiltering.ipynb b/LabsDataAggregationAndFiltering.ipynb
new file mode 100644
index 0000000..399eb33
--- /dev/null
+++ b/LabsDataAggregationAndFiltering.ipynb
@@ -0,0 +1,815 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "e28ddcd1-14ee-4f34-90a7-0387b77af02b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "41c1fe74-ae67-4c01-b8b5-5ea331225a4f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " Customer | \n",
+ " State | \n",
+ " Customer Lifetime Value | \n",
+ " Response | \n",
+ " Coverage | \n",
+ " Education | \n",
+ " Effective To Date | \n",
+ " EmploymentStatus | \n",
+ " Gender | \n",
+ " ... | \n",
+ " Number of Open Complaints | \n",
+ " Number of Policies | \n",
+ " Policy Type | \n",
+ " Policy | \n",
+ " Renew Offer Type | \n",
+ " Sales Channel | \n",
+ " Total Claim Amount | \n",
+ " Vehicle Class | \n",
+ " Vehicle Size | \n",
+ " Vehicle Type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2/18/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/18/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " LZ68649 | \n",
+ " Washington | \n",
+ " 14947.917300 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2/10/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Call Center | \n",
+ " 480.000000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
3 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 Customer State Customer Lifetime Value Response Coverage \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No Basic \n",
+ "1 1 KX64629 California 2228.525238 No Basic \n",
+ "2 2 LZ68649 Washington 14947.917300 No Basic \n",
+ "\n",
+ " Education Effective To Date EmploymentStatus Gender ... \\\n",
+ "0 College 2/18/11 Employed M ... \n",
+ "1 College 1/18/11 Unemployed F ... \n",
+ "2 Bachelor 2/10/11 Employed M ... \n",
+ "\n",
+ " Number of Open Complaints Number of Policies Policy Type Policy \\\n",
+ "0 0.0 9 Corporate Auto Corporate L3 \n",
+ "1 0.0 1 Personal Auto Personal L3 \n",
+ "2 0.0 2 Personal Auto Personal L3 \n",
+ "\n",
+ " Renew Offer Type Sales Channel Total Claim Amount Vehicle Class \\\n",
+ "0 Offer3 Agent 292.800000 Four-Door Car \n",
+ "1 Offer4 Call Center 744.924331 Four-Door Car \n",
+ "2 Offer3 Call Center 480.000000 SUV \n",
+ "\n",
+ " Vehicle Size Vehicle Type \n",
+ "0 Medsize NaN \n",
+ "1 Medsize NaN \n",
+ "2 Medsize A \n",
+ "\n",
+ "[3 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n",
+ "df = pd.read_csv(url)\n",
+ "df.head(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "17c31865-6c31-4b8a-86e5-fc124ad8c5b7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(10910, 26)"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "177f4cdd-9cec-4cab-ad95-04e6b6e9af80",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " Customer | \n",
+ " State | \n",
+ " Customer Lifetime Value | \n",
+ " Response | \n",
+ " Coverage | \n",
+ " Education | \n",
+ " Effective To Date | \n",
+ " EmploymentStatus | \n",
+ " Gender | \n",
+ " ... | \n",
+ " Number of Open Complaints | \n",
+ " Number of Policies | \n",
+ " Policy Type | \n",
+ " Policy | \n",
+ " Renew Offer Type | \n",
+ " Sales Channel | \n",
+ " Total Claim Amount | \n",
+ " Vehicle Class | \n",
+ " Vehicle Size | \n",
+ " Vehicle Type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/11/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 8 | \n",
+ " FM55990 | \n",
+ " California | \n",
+ " 5989.773931 | \n",
+ " Yes | \n",
+ " Premium | \n",
+ " College | \n",
+ " 1/19/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 739.200000 | \n",
+ " Sports Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 15 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " 4626.801093 | \n",
+ " Yes | \n",
+ " Basic | \n",
+ " Master | \n",
+ " 1/16/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Special Auto | \n",
+ " Special L1 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 547.200000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
3 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 Customer State Customer Lifetime Value Response \\\n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "8 8 FM55990 California 5989.773931 Yes \n",
+ "15 15 CW49887 California 4626.801093 Yes \n",
+ "\n",
+ " Coverage Education Effective To Date EmploymentStatus Gender ... \\\n",
+ "3 Extended College 1/11/11 Employed M ... \n",
+ "8 Premium College 1/19/11 Employed M ... \n",
+ "15 Basic Master 1/16/11 Employed F ... \n",
+ "\n",
+ " Number of Open Complaints Number of Policies Policy Type \\\n",
+ "3 0.0 2 Corporate Auto \n",
+ "8 0.0 1 Personal Auto \n",
+ "15 0.0 1 Special Auto \n",
+ "\n",
+ " Policy Renew Offer Type Sales Channel Total Claim Amount \\\n",
+ "3 Corporate L3 Offer2 Branch 484.013411 \n",
+ "8 Personal L1 Offer2 Branch 739.200000 \n",
+ "15 Special L1 Offer2 Branch 547.200000 \n",
+ "\n",
+ " Vehicle Class Vehicle Size Vehicle Type \n",
+ "3 Four-Door Car Medsize A \n",
+ "8 Sports Car Medsize NaN \n",
+ "15 SUV Medsize NaN \n",
+ "\n",
+ "[3 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "filtered_df = df[(df['Total Claim Amount'] < 1000) & (df['Response'] == 'Yes')]\n",
+ "filtered_df.head(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "1170df77-a367-495b-b1d3-169655e3d37d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(1399, 26)"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "filtered_df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "ebbd3eee-c141-4b7e-8582-7ac0d781c996",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Policy Type | \n",
+ " Gender | \n",
+ " Monthly Premium Auto | \n",
+ " Customer Lifetime Value | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Corporate Auto | \n",
+ " F | \n",
+ " 94.301775 | \n",
+ " 7712.628736 | \n",
+ " 433.738499 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Corporate Auto | \n",
+ " M | \n",
+ " 92.188312 | \n",
+ " 7944.465414 | \n",
+ " 408.582459 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " F | \n",
+ " 98.998148 | \n",
+ " 8339.791842 | \n",
+ " 452.965929 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " M | \n",
+ " 91.085821 | \n",
+ " 7448.383281 | \n",
+ " 457.010178 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Special Auto | \n",
+ " F | \n",
+ " 92.314286 | \n",
+ " 7691.584111 | \n",
+ " 453.280164 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Special Auto | \n",
+ " M | \n",
+ " 86.343750 | \n",
+ " 8247.088702 | \n",
+ " 429.527942 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Policy Type Gender Monthly Premium Auto Customer Lifetime Value \\\n",
+ "0 Corporate Auto F 94.301775 7712.628736 \n",
+ "1 Corporate Auto M 92.188312 7944.465414 \n",
+ "2 Personal Auto F 98.998148 8339.791842 \n",
+ "3 Personal Auto M 91.085821 7448.383281 \n",
+ "4 Special Auto F 92.314286 7691.584111 \n",
+ "5 Special Auto M 86.343750 8247.088702 \n",
+ "\n",
+ " Total Claim Amount \n",
+ "0 433.738499 \n",
+ "1 408.582459 \n",
+ "2 452.965929 \n",
+ "3 457.010178 \n",
+ "4 453.280164 \n",
+ "5 429.527942 "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "responded_df = df[df['Response'] == 'Yes']\n",
+ "\n",
+ "summary = responded_df.groupby(['Policy Type', 'Gender'])[\n",
+ " ['Monthly Premium Auto', 'Customer Lifetime Value', 'Total Claim Amount']\n",
+ "].mean().reset_index()\n",
+ "\n",
+ "summary"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "e28118ae-583d-4604-b5d7-d64b062fd3bf",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(6, 5)"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "summary.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "06854e42-1fa1-47df-bcc1-b73ac1ea89e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### Corporate Auto males are more profitable with high value and little claims and \n",
+ "### personal auto males have low value and high claims: this makes them riskier."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "1df6f892-32ca-4ffd-b3a5-f8d16e88f7de",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " State | \n",
+ " Customer Count | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " California | \n",
+ " 3552 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Oregon | \n",
+ " 2909 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Arizona | \n",
+ " 1937 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Nevada | \n",
+ " 993 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Washington | \n",
+ " 888 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " State Customer Count\n",
+ "0 California 3552\n",
+ "1 Oregon 2909\n",
+ "2 Arizona 1937\n",
+ "3 Nevada 993\n",
+ "4 Washington 888"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "customers_per_state = df['State'].value_counts()\n",
+ "\n",
+ "states_over_500 = customers_per_state[customers_per_state > 500]\n",
+ "\n",
+ "states_over_500_df = states_over_500.reset_index()\n",
+ "states_over_500_df.columns = ['State', 'Customer Count']\n",
+ "states_over_500_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "add8973a-d24e-44c4-8fc1-240f14e4f175",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Education | \n",
+ " Gender | \n",
+ " max | \n",
+ " min | \n",
+ " median | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Bachelor | \n",
+ " F | \n",
+ " 73225.95652 | \n",
+ " 1904.000852 | \n",
+ " 5640.505303 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 67907.27050 | \n",
+ " 1898.007675 | \n",
+ " 5548.031892 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " College | \n",
+ " F | \n",
+ " 61850.18803 | \n",
+ " 1898.683686 | \n",
+ " 5623.611187 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " College | \n",
+ " M | \n",
+ " 61134.68307 | \n",
+ " 1918.119700 | \n",
+ " 6005.847375 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Doctor | \n",
+ " F | \n",
+ " 44856.11397 | \n",
+ " 2395.570000 | \n",
+ " 5332.462694 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Doctor | \n",
+ " M | \n",
+ " 32677.34284 | \n",
+ " 2267.604038 | \n",
+ " 5577.669457 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " High School or Below | \n",
+ " F | \n",
+ " 55277.44589 | \n",
+ " 2144.921535 | \n",
+ " 6039.553187 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 83325.38119 | \n",
+ " 1940.981221 | \n",
+ " 6286.731006 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Master | \n",
+ " F | \n",
+ " 51016.06704 | \n",
+ " 2417.777032 | \n",
+ " 5729.855012 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Master | \n",
+ " M | \n",
+ " 50568.25912 | \n",
+ " 2272.307310 | \n",
+ " 5579.099207 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Education Gender max min median\n",
+ "0 Bachelor F 73225.95652 1904.000852 5640.505303\n",
+ "1 Bachelor M 67907.27050 1898.007675 5548.031892\n",
+ "2 College F 61850.18803 1898.683686 5623.611187\n",
+ "3 College M 61134.68307 1918.119700 6005.847375\n",
+ "4 Doctor F 44856.11397 2395.570000 5332.462694\n",
+ "5 Doctor M 32677.34284 2267.604038 5577.669457\n",
+ "6 High School or Below F 55277.44589 2144.921535 6039.553187\n",
+ "7 High School or Below M 83325.38119 1940.981221 6286.731006\n",
+ "8 Master F 51016.06704 2417.777032 5729.855012\n",
+ "9 Master M 50568.25912 2272.307310 5579.099207"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "stats = df.groupby(['Education', 'Gender'])['Customer Lifetime Value'].agg(['max', 'min', 'median']).reset_index()\n",
+ "\n",
+ "stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "ef29969f-c472-49f5-893f-c0b0f5025968",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### Bachelor’s degree holders tend to have the highest maximum customer lifetime value, \n",
+ "### while median values are the same across all educaton levels and genders."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "16a21305-728b-42de-a854-44ce4ba5ab87",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python [conda env:base] *",
+ "language": "python",
+ "name": "conda-base-py"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}