diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb
index fadd718..304ad88 100644
--- a/lab-dw-aggregating.ipynb
+++ b/lab-dw-aggregating.ipynb
@@ -127,14 +127,1189 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
+ "id": "c623d287",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(10910, 26)\n",
+ "(10910, 26)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unnamed:_0 | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " response | \n",
+ " coverage | \n",
+ " education | \n",
+ " effective_to_date | \n",
+ " employmentstatus | \n",
+ " gender | \n",
+ " ... | \n",
+ " number_of_open_complaints | \n",
+ " number_of_policies | \n",
+ " policy_type | \n",
+ " policy | \n",
+ " renew_offer_type | \n",
+ " sales_channel | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ " vehicle_size | \n",
+ " vehicle_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " DK49336 | \n",
+ " Arizona | \n",
+ " 4809.216960 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2/18/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 9 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 292.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " KX64629 | \n",
+ " California | \n",
+ " 2228.525238 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 1/18/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 744.924331 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unnamed:_0 customer state customer_lifetime_value response coverage \\\n",
+ "0 0 DK49336 Arizona 4809.216960 No Basic \n",
+ "1 1 KX64629 California 2228.525238 No Basic \n",
+ "\n",
+ " education effective_to_date employmentstatus gender ... \\\n",
+ "0 College 2/18/11 Employed M ... \n",
+ "1 College 1/18/11 Unemployed F ... \n",
+ "\n",
+ " number_of_open_complaints number_of_policies policy_type policy \\\n",
+ "0 0.0 9 Corporate Auto Corporate L3 \n",
+ "1 0.0 1 Personal Auto Personal L3 \n",
+ "\n",
+ " renew_offer_type sales_channel total_claim_amount vehicle_class \\\n",
+ "0 Offer3 Agent 292.800000 Four-Door Car \n",
+ "1 Offer4 Call Center 744.924331 Four-Door Car \n",
+ "\n",
+ " vehicle_size vehicle_type \n",
+ "0 Medsize NaN \n",
+ "1 Medsize NaN \n",
+ "\n",
+ "[2 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "CSV_PATH = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n",
+ "df = pd.read_csv(CSV_PATH)\n",
+ "\n",
+ "# clean column names\n",
+ "df.columns = (df.columns.str.strip()\n",
+ " .str.lower()\n",
+ " .str.replace(\" \", \"_\")\n",
+ " .str.replace(\"-\", \"_\"))\n",
+ "print(df.shape)\n",
+ "df.head()\n",
+ "\n",
+ "df.columns = (df.columns.str.strip()\n",
+ " .str.lower()\n",
+ " .str.replace(\" \", \"_\")\n",
+ " .str.replace(\"-\", \"_\"))\n",
+ "\n",
+ "print(df.shape)\n",
+ "df.head(2)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "6840b954",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unnamed:_0 | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " response | \n",
+ " coverage | \n",
+ " education | \n",
+ " effective_to_date | \n",
+ " employmentstatus | \n",
+ " gender | \n",
+ " ... | \n",
+ " number_of_open_complaints | \n",
+ " number_of_policies | \n",
+ " policy_type | \n",
+ " policy | \n",
+ " renew_offer_type | \n",
+ " sales_channel | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ " vehicle_size | \n",
+ " vehicle_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/11/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 8 | \n",
+ " FM55990 | \n",
+ " California | \n",
+ " 5989.773931 | \n",
+ " Yes | \n",
+ " Premium | \n",
+ " College | \n",
+ " 1/19/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 739.200000 | \n",
+ " Sports Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 15 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " 4626.801093 | \n",
+ " Yes | \n",
+ " Basic | \n",
+ " Master | \n",
+ " 1/16/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Special Auto | \n",
+ " Special L1 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 547.200000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 19 | \n",
+ " NJ54277 | \n",
+ " California | \n",
+ " 3746.751625 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2/26/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer2 | \n",
+ " Call Center | \n",
+ " 19.575683 | \n",
+ " Two-Door Car | \n",
+ " Large | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 27 | \n",
+ " MQ68407 | \n",
+ " Oregon | \n",
+ " 4376.363592 | \n",
+ " Yes | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2/28/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Agent | \n",
+ " 60.036683 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unnamed:_0 customer state customer_lifetime_value response \\\n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "8 8 FM55990 California 5989.773931 Yes \n",
+ "15 15 CW49887 California 4626.801093 Yes \n",
+ "19 19 NJ54277 California 3746.751625 Yes \n",
+ "27 27 MQ68407 Oregon 4376.363592 Yes \n",
+ "\n",
+ " coverage education effective_to_date employmentstatus gender ... \\\n",
+ "3 Extended College 1/11/11 Employed M ... \n",
+ "8 Premium College 1/19/11 Employed M ... \n",
+ "15 Basic Master 1/16/11 Employed F ... \n",
+ "19 Extended College 2/26/11 Employed F ... \n",
+ "27 Premium Bachelor 2/28/11 Employed F ... \n",
+ "\n",
+ " number_of_open_complaints number_of_policies policy_type \\\n",
+ "3 0.0 2 Corporate Auto \n",
+ "8 0.0 1 Personal Auto \n",
+ "15 0.0 1 Special Auto \n",
+ "19 1.0 1 Personal Auto \n",
+ "27 0.0 1 Personal Auto \n",
+ "\n",
+ " policy renew_offer_type sales_channel total_claim_amount \\\n",
+ "3 Corporate L3 Offer2 Branch 484.013411 \n",
+ "8 Personal L1 Offer2 Branch 739.200000 \n",
+ "15 Special L1 Offer2 Branch 547.200000 \n",
+ "19 Personal L2 Offer2 Call Center 19.575683 \n",
+ "27 Personal L3 Offer2 Agent 60.036683 \n",
+ "\n",
+ " vehicle_class vehicle_size vehicle_type \n",
+ "3 Four-Door Car Medsize A \n",
+ "8 Sports Car Medsize NaN \n",
+ "15 SUV Medsize NaN \n",
+ "19 Two-Door Car Large A \n",
+ "27 Four-Door Car Medsize NaN \n",
+ "\n",
+ "[5 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# customers with claim <1000 and responded Yes\n",
+ "low_claim_yes = df[(df[\"total_claim_amount\"] < 1000) & (df[\"response\"] == \"Yes\")]\n",
+ "low_claim_yes.head()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "ec5077d1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " | \n",
+ " monthly_premium_auto | \n",
+ " customer_lifetime_value | \n",
+ "
\n",
+ " \n",
+ " policy_type | \n",
+ " gender | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Corporate Auto | \n",
+ " F | \n",
+ " 91.384615 | \n",
+ " 7980.306825 | \n",
+ "
\n",
+ " \n",
+ " M | \n",
+ " 94.764249 | \n",
+ " 7750.741082 | \n",
+ "
\n",
+ " \n",
+ " Personal Auto | \n",
+ " F | \n",
+ " 93.153179 | \n",
+ " 8074.660516 | \n",
+ "
\n",
+ " \n",
+ " M | \n",
+ " 93.301056 | \n",
+ " 7971.386285 | \n",
+ "
\n",
+ " \n",
+ " Special Auto | \n",
+ " F | \n",
+ " 93.563025 | \n",
+ " 8460.398042 | \n",
+ "
\n",
+ " \n",
+ " M | \n",
+ " 93.197044 | \n",
+ " 9010.601583 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " monthly_premium_auto customer_lifetime_value\n",
+ "policy_type gender \n",
+ "Corporate Auto F 91.384615 7980.306825\n",
+ " M 94.764249 7750.741082\n",
+ "Personal Auto F 93.153179 8074.660516\n",
+ " M 93.301056 7971.386285\n",
+ "Special Auto F 93.563025 8460.398042\n",
+ " M 93.197044 9010.601583"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# group by policy_type + gender and take mean\n",
+ "analysis = df.groupby([\"policy_type\", \"gender\"])[[\"monthly_premium_auto\", \"customer_lifetime_value\"]].mean()\n",
+ "analysis\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "90e9ac6d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "state\n",
+ "California 3552\n",
+ "Oregon 2909\n",
+ "Arizona 1937\n",
+ "Nevada 993\n",
+ "Washington 888\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# count customers per state\n",
+ "state_counts = df[\"state\"].value_counts()\n",
+ "big_states = state_counts[state_counts > 500]\n",
+ "big_states\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "e73095c5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " | \n",
+ " max | \n",
+ " min | \n",
+ " median | \n",
+ "
\n",
+ " \n",
+ " education | \n",
+ " gender | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Bachelor | \n",
+ " F | \n",
+ " 73225.95652 | \n",
+ " 1904.000852 | \n",
+ " 5640.505303 | \n",
+ "
\n",
+ " \n",
+ " M | \n",
+ " 67907.27050 | \n",
+ " 1898.007675 | \n",
+ " 5548.031892 | \n",
+ "
\n",
+ " \n",
+ " College | \n",
+ " F | \n",
+ " 61850.18803 | \n",
+ " 1898.683686 | \n",
+ " 5623.611187 | \n",
+ "
\n",
+ " \n",
+ " M | \n",
+ " 61134.68307 | \n",
+ " 1918.119700 | \n",
+ " 6005.847375 | \n",
+ "
\n",
+ " \n",
+ " Doctor | \n",
+ " F | \n",
+ " 44856.11397 | \n",
+ " 2395.570000 | \n",
+ " 5332.462694 | \n",
+ "
\n",
+ " \n",
+ " M | \n",
+ " 32677.34284 | \n",
+ " 2267.604038 | \n",
+ " 5577.669457 | \n",
+ "
\n",
+ " \n",
+ " High School or Below | \n",
+ " F | \n",
+ " 55277.44589 | \n",
+ " 2144.921535 | \n",
+ " 6039.553187 | \n",
+ "
\n",
+ " \n",
+ " M | \n",
+ " 83325.38119 | \n",
+ " 1940.981221 | \n",
+ " 6286.731006 | \n",
+ "
\n",
+ " \n",
+ " Master | \n",
+ " F | \n",
+ " 51016.06704 | \n",
+ " 2417.777032 | \n",
+ " 5729.855012 | \n",
+ "
\n",
+ " \n",
+ " M | \n",
+ " 50568.25912 | \n",
+ " 2272.307310 | \n",
+ " 5579.099207 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " max min median\n",
+ "education gender \n",
+ "Bachelor F 73225.95652 1904.000852 5640.505303\n",
+ " M 67907.27050 1898.007675 5548.031892\n",
+ "College F 61850.18803 1898.683686 5623.611187\n",
+ " M 61134.68307 1918.119700 6005.847375\n",
+ "Doctor F 44856.11397 2395.570000 5332.462694\n",
+ " M 32677.34284 2267.604038 5577.669457\n",
+ "High School or Below F 55277.44589 2144.921535 6039.553187\n",
+ " M 83325.38119 1940.981221 6286.731006\n",
+ "Master F 51016.06704 2417.777032 5729.855012\n",
+ " M 50568.25912 2272.307310 5579.099207"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# max, min, median CLV grouped by education + gender\n",
+ "clv_stats = df.groupby([\"education\", \"gender\"])[\"customer_lifetime_value\"].agg([\"max\", \"min\", \"median\"])\n",
+ "clv_stats\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "d68d573b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " effective_to_date | \n",
+ " 1/1/11 | \n",
+ " 1/10/11 | \n",
+ " 1/11/11 | \n",
+ " 1/12/11 | \n",
+ " 1/13/11 | \n",
+ " 1/14/11 | \n",
+ " 1/15/11 | \n",
+ " 1/16/11 | \n",
+ " 1/17/11 | \n",
+ " 1/18/11 | \n",
+ " ... | \n",
+ " 2/26/11 | \n",
+ " 2/27/11 | \n",
+ " 2/28/11 | \n",
+ " 2/3/11 | \n",
+ " 2/4/11 | \n",
+ " 2/5/11 | \n",
+ " 2/6/11 | \n",
+ " 2/7/11 | \n",
+ " 2/8/11 | \n",
+ " 2/9/11 | \n",
+ "
\n",
+ " \n",
+ " state | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Arizona | \n",
+ " 35 | \n",
+ " 34 | \n",
+ " 35 | \n",
+ " 32 | \n",
+ " 35 | \n",
+ " 36 | \n",
+ " 29 | \n",
+ " 33 | \n",
+ " 35 | \n",
+ " 37 | \n",
+ " ... | \n",
+ " 40 | \n",
+ " 40 | \n",
+ " 29 | \n",
+ " 49 | \n",
+ " 48 | \n",
+ " 42 | \n",
+ " 23 | \n",
+ " 30 | \n",
+ " 41 | \n",
+ " 37 | \n",
+ "
\n",
+ " \n",
+ " California | \n",
+ " 56 | \n",
+ " 72 | \n",
+ " 81 | \n",
+ " 49 | \n",
+ " 55 | \n",
+ " 57 | \n",
+ " 54 | \n",
+ " 66 | \n",
+ " 60 | \n",
+ " 59 | \n",
+ " ... | \n",
+ " 61 | \n",
+ " 64 | \n",
+ " 49 | \n",
+ " 62 | \n",
+ " 57 | \n",
+ " 55 | \n",
+ " 62 | \n",
+ " 76 | \n",
+ " 39 | \n",
+ " 59 | \n",
+ "
\n",
+ " \n",
+ " Nevada | \n",
+ " 13 | \n",
+ " 25 | \n",
+ " 19 | \n",
+ " 12 | \n",
+ " 20 | \n",
+ " 10 | \n",
+ " 18 | \n",
+ " 10 | \n",
+ " 14 | \n",
+ " 18 | \n",
+ " ... | \n",
+ " 11 | \n",
+ " 20 | \n",
+ " 19 | \n",
+ " 23 | \n",
+ " 17 | \n",
+ " 5 | \n",
+ " 15 | \n",
+ " 11 | \n",
+ " 18 | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " Oregon | \n",
+ " 42 | \n",
+ " 70 | \n",
+ " 41 | \n",
+ " 36 | \n",
+ " 39 | \n",
+ " 54 | \n",
+ " 51 | \n",
+ " 44 | \n",
+ " 77 | \n",
+ " 45 | \n",
+ " ... | \n",
+ " 59 | \n",
+ " 55 | \n",
+ " 64 | \n",
+ " 37 | \n",
+ " 54 | \n",
+ " 47 | \n",
+ " 43 | \n",
+ " 55 | \n",
+ " 35 | \n",
+ " 34 | \n",
+ "
\n",
+ " \n",
+ " Washington | \n",
+ " 18 | \n",
+ " 21 | \n",
+ " 9 | \n",
+ " 12 | \n",
+ " 13 | \n",
+ " 11 | \n",
+ " 15 | \n",
+ " 11 | \n",
+ " 23 | \n",
+ " 13 | \n",
+ " ... | \n",
+ " 15 | \n",
+ " 15 | \n",
+ " 17 | \n",
+ " 12 | \n",
+ " 11 | \n",
+ " 18 | \n",
+ " 21 | \n",
+ " 10 | \n",
+ " 14 | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 59 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ "effective_to_date 1/1/11 1/10/11 1/11/11 1/12/11 1/13/11 1/14/11 \\\n",
+ "state \n",
+ "Arizona 35 34 35 32 35 36 \n",
+ "California 56 72 81 49 55 57 \n",
+ "Nevada 13 25 19 12 20 10 \n",
+ "Oregon 42 70 41 36 39 54 \n",
+ "Washington 18 21 9 12 13 11 \n",
+ "\n",
+ "effective_to_date 1/15/11 1/16/11 1/17/11 1/18/11 ... 2/26/11 2/27/11 \\\n",
+ "state ... \n",
+ "Arizona 29 33 35 37 ... 40 40 \n",
+ "California 54 66 60 59 ... 61 64 \n",
+ "Nevada 18 10 14 18 ... 11 20 \n",
+ "Oregon 51 44 77 45 ... 59 55 \n",
+ "Washington 15 11 23 13 ... 15 15 \n",
+ "\n",
+ "effective_to_date 2/28/11 2/3/11 2/4/11 2/5/11 2/6/11 2/7/11 2/8/11 \\\n",
+ "state \n",
+ "Arizona 29 49 48 42 23 30 41 \n",
+ "California 49 62 57 55 62 76 39 \n",
+ "Nevada 19 23 17 5 15 11 18 \n",
+ "Oregon 64 37 54 47 43 55 35 \n",
+ "Washington 17 12 11 18 21 10 14 \n",
+ "\n",
+ "effective_to_date 2/9/11 \n",
+ "state \n",
+ "Arizona 37 \n",
+ "California 59 \n",
+ "Nevada 14 \n",
+ "Oregon 34 \n",
+ "Washington 16 \n",
+ "\n",
+ "[5 rows x 59 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# group by state + month (effective_to_date), pivot table\n",
+ "policies_by_state_month = df.groupby([\"state\", \"effective_to_date\"]).size().unstack(fill_value=0)\n",
+ "policies_by_state_month.head()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "752f7ee1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " effective_to_date | \n",
+ " 1/1/11 | \n",
+ " 1/10/11 | \n",
+ " 1/11/11 | \n",
+ " 1/12/11 | \n",
+ " 1/13/11 | \n",
+ " 1/14/11 | \n",
+ " 1/15/11 | \n",
+ " 1/16/11 | \n",
+ " 1/17/11 | \n",
+ " 1/18/11 | \n",
+ " ... | \n",
+ " 2/26/11 | \n",
+ " 2/27/11 | \n",
+ " 2/28/11 | \n",
+ " 2/3/11 | \n",
+ " 2/4/11 | \n",
+ " 2/5/11 | \n",
+ " 2/6/11 | \n",
+ " 2/7/11 | \n",
+ " 2/8/11 | \n",
+ " 2/9/11 | \n",
+ "
\n",
+ " \n",
+ " state | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Arizona | \n",
+ " 35 | \n",
+ " 34 | \n",
+ " 35 | \n",
+ " 32 | \n",
+ " 35 | \n",
+ " 36 | \n",
+ " 29 | \n",
+ " 33 | \n",
+ " 35 | \n",
+ " 37 | \n",
+ " ... | \n",
+ " 40 | \n",
+ " 40 | \n",
+ " 29 | \n",
+ " 49 | \n",
+ " 48 | \n",
+ " 42 | \n",
+ " 23 | \n",
+ " 30 | \n",
+ " 41 | \n",
+ " 37 | \n",
+ "
\n",
+ " \n",
+ " California | \n",
+ " 56 | \n",
+ " 72 | \n",
+ " 81 | \n",
+ " 49 | \n",
+ " 55 | \n",
+ " 57 | \n",
+ " 54 | \n",
+ " 66 | \n",
+ " 60 | \n",
+ " 59 | \n",
+ " ... | \n",
+ " 61 | \n",
+ " 64 | \n",
+ " 49 | \n",
+ " 62 | \n",
+ " 57 | \n",
+ " 55 | \n",
+ " 62 | \n",
+ " 76 | \n",
+ " 39 | \n",
+ " 59 | \n",
+ "
\n",
+ " \n",
+ " Oregon | \n",
+ " 42 | \n",
+ " 70 | \n",
+ " 41 | \n",
+ " 36 | \n",
+ " 39 | \n",
+ " 54 | \n",
+ " 51 | \n",
+ " 44 | \n",
+ " 77 | \n",
+ " 45 | \n",
+ " ... | \n",
+ " 59 | \n",
+ " 55 | \n",
+ " 64 | \n",
+ " 37 | \n",
+ " 54 | \n",
+ " 47 | \n",
+ " 43 | \n",
+ " 55 | \n",
+ " 35 | \n",
+ " 34 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
3 rows × 59 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ "effective_to_date 1/1/11 1/10/11 1/11/11 1/12/11 1/13/11 1/14/11 \\\n",
+ "state \n",
+ "Arizona 35 34 35 32 35 36 \n",
+ "California 56 72 81 49 55 57 \n",
+ "Oregon 42 70 41 36 39 54 \n",
+ "\n",
+ "effective_to_date 1/15/11 1/16/11 1/17/11 1/18/11 ... 2/26/11 2/27/11 \\\n",
+ "state ... \n",
+ "Arizona 29 33 35 37 ... 40 40 \n",
+ "California 54 66 60 59 ... 61 64 \n",
+ "Oregon 51 44 77 45 ... 59 55 \n",
+ "\n",
+ "effective_to_date 2/28/11 2/3/11 2/4/11 2/5/11 2/6/11 2/7/11 2/8/11 \\\n",
+ "state \n",
+ "Arizona 29 49 48 42 23 30 41 \n",
+ "California 49 62 57 55 62 76 39 \n",
+ "Oregon 64 37 54 47 43 55 35 \n",
+ "\n",
+ "effective_to_date 2/9/11 \n",
+ "state \n",
+ "Arizona 37 \n",
+ "California 59 \n",
+ "Oregon 34 \n",
+ "\n",
+ "[3 rows x 59 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# find top 3 states with most policies\n",
+ "top_states = df[\"state\"].value_counts().head(3).index\n",
+ "top3 = df[df[\"state\"].isin(top_states)]\n",
+ "top3_table = top3.groupby([\"state\", \"effective_to_date\"]).size().unstack(fill_value=0)\n",
+ "top3_table\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
"id": "449513f4-0459-46a0-a18d-9398d974c9ad",
"metadata": {
"id": "449513f4-0459-46a0-a18d-9398d974c9ad"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " vehicle_class | \n",
+ " Four-Door Car | \n",
+ " Luxury Car | \n",
+ " Luxury SUV | \n",
+ " SUV | \n",
+ " Sports Car | \n",
+ " Two-Door Car | \n",
+ "
\n",
+ " \n",
+ " response | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " No | \n",
+ " 0.865757 | \n",
+ " 0.931429 | \n",
+ " 0.836735 | \n",
+ " 0.838676 | \n",
+ " 0.815739 | \n",
+ " 0.861895 | \n",
+ "
\n",
+ " \n",
+ " Yes | \n",
+ " 0.134243 | \n",
+ " 0.068571 | \n",
+ " 0.163265 | \n",
+ " 0.161324 | \n",
+ " 0.184261 | \n",
+ " 0.138105 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "vehicle_class Four-Door Car Luxury Car Luxury SUV SUV Sports Car \\\n",
+ "response \n",
+ "No 0.865757 0.931429 0.836735 0.838676 0.815739 \n",
+ "Yes 0.134243 0.068571 0.163265 0.161324 0.184261 \n",
+ "\n",
+ "vehicle_class Two-Door Car \n",
+ "response \n",
+ "No 0.861895 \n",
+ "Yes 0.138105 "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# your code goes here"
+ "# response rate by channel\n",
+ "channel_response = pd.crosstab(df[\"response\"], df[\"vehicle_class\"], normalize=\"columns\")\n",
+ "channel_response\n"
]
}
],
@@ -143,7 +1318,7 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "Python 3",
"language": "python",
"name": "python3"
},
@@ -157,7 +1332,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.11.3"
}
},
"nbformat": 4,