From 4354a27b5956309e7f034180361b6a0f0bf6d1e3 Mon Sep 17 00:00:00 2001
From: Marisa Oliveira <163911161+HR-Freak@users.noreply.github.com>
Date: Thu, 20 Nov 2025 13:58:07 +0000
Subject: [PATCH] Solved data structuring and combining

---
 .../cleaning_functions.cpython-313.pyc        | Bin 0 -> 4316 bytes
 cleaning_functions.py                         |  94 ++
 lab-dw-data-structuring-and-combining.ipynb   | 975 +++++++++++++++++-
 3 files changed, 1059 insertions(+), 10 deletions(-)
 create mode 100644 __pycache__/cleaning_functions.cpython-313.pyc
 create mode 100644 cleaning_functions.py
diff --git a/__pycache__/cleaning_functions.cpython-313.pyc b/__pycache__/cleaning_functions.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5583d724b5bc76f596cc71c7467d2f654b3c593f
GIT binary patch
literal 4316
zcmcInO;8)j6`mhyMk65!fq(^!051efAjHDjiPw%3ehgl(&6b%ZxR!D$gEW{4qZw6?
zgt6CEE2>hpupEFYyR6CyR}Qf&TZxZ*$RURua@0fzsd`=It=e3C!%O9o$|0{uKL#N?
zd)PGSbx-$uuYcb6UcWwcyPXsi^q+w8F9$_^M?NZrDK`!;LE|AMQX>5kg=nj05Gyhr
z6tamdP`hXY%87QMyvPA{h&)h1bO3eUrh~#uqHH!smvxfL20s>lhkpX{kea9Fpsp=*
zl~o)Wn6n@GC^yfR>$?2>Nq&(QnX_fx^ORa;fK>?9k7`3?McW81+RxJ(th0{_a<iE3
z!E_WeMhe@Fl$4T#H0EQvnoej&(2n3RDa;xvgzc(+M@E=Oa#EFIGUkx1NeTHWjhXne
zK@Ow`FGuDL85xm;gcL)HOv%P-N>4_nq?FV%u1V_MloB%{3$ncWengd)!tp!dIE;ls
ze>#>*BSpI%mb7>{rYDvZ4Zb2VRhCF2x}4TxDMi<e;pAQ1WOYYN%Z!p`7^o3eKn~QH
z29lwQzUD_AdmWDgdx3(l-}Lq8eFFvGpy?Yd_{L1%*t5y6oAbV(XC{7owJ5lD+PB)X
zQ~LwiUu?GLgu%>2(dEv3cnF%PW47j?!?N|SVZ+4MB2DI;JBkM)1ZA;m@w4U8x;Fh0
zFi^!zJ&QA7bFlg_TUiBJ^8l<5YP+@edQzOoiw;pZCS@;6Io~Md5*tS7Bl9@R67Dxj
zctrB&JvNWKBn9YfoW<O2S&IYg=<ArC4zk#Gef-*G%uQdmYP(e<9=3y>lai_|>qt{1
z%#S1Go~}vQxgZ%U0H2hu5n;lDnek6CvoMYYIi8M51XEa;kYX#csv`qCDoyNOlUEb~
zS~R9ghJm?j>Gd?a8)WY}#iWi>M$e>#?m0`n$J*ljuYz{WNk-~!Qby!>iEAMxiS6km
z^dJ{>Avchc+aNDt@s}{IXlt;@cob+FP)*LFfpGo}NQU~>-@3cLcdg(bF#Q8xv3dVc
z!9Qa9M+*LPrvF^t|FeSsqUpa_@K2cjiM)U6Ma!wZh5gtw@vmcM*G#_U$^lE+`ONyp
zwF3v$+_Jl{H&|%uH=Fv4ji-v9wxXxw6>l5h4k=qRpP9(q+4$(dL$$SMJNNWLYuIcJ
zKl$wItGVHseCrh${%qsp1CHwK%0|tOaG~R@*>Sem8YumPo}UsuokY*)rmgLufokw%
zK14p)NPVCBH;nwhd`cvKV1vlV{!lLzxvE4x?<SGJDiyqod>HI{#Jft7p`Ld+Dl)IR
z0qw#pZi}Ujl%9}L6re1pl!RQu0jB#gH4Ty3j^x|&I;IC-#$oc8B*Z|F?t{w0nT0HD
zUsiPq!a35@TKojggmUau*U%`)^~2A&4+OyFaOPZnrlY^$7&0A0Ux|6gdoP5Bt>&GD
zt%Y4NdwT!!=0eULs^90`YJFhe?#ed5;#r}~!dZ^*F>SqI`2K<1z%R|d@j~B>**Eid
zI^TCSGf^6~0_{mqkceRgS{v-~H7GQC5e7=7t|4ue(bx4H2@f@8z8a>^lt-i>7>}-d
zX)+aa%@N0~DT~BAty1ASYE;lzPsUR#qtU=w?X8T$#X`KLrj%jHXv{n_*HkkIpCU&N
zPvky$Kv9w(cE!vAtu2uLP@{~<z0VMCjs3X!t$a*{@5uSL*6O6F)6_gmu8#A}kA0HF
zJ=(Q&VhNl>UykZYS%dpO0lA2xr3^$o0n?FBz(+WFJqby~OX9Vdi|I0ok)(!$zzYFB
z*npX29J5kV!FCd$4cw?0QP3<SB^Hg7F3c?}s;WtrV__C1s6_Fks*pTsMFJ+9qUtg1
zOz9QjpbZfdVixie%o&8&z-&T^8w8a-P)d=$rR0INTbKc+5%?MZ00Ie&vmxg`V>*MG
z$>+TA#oET&7xy>r7x+Gt@7os(e9+{B#ZdUqp{Jo|H~ttdgf5w(OMfx)p~+1q=lY51
z7$|yun?lj)-U)4m9*EnK%w*BsNPKzc*4C};*nVpBR?a<?nJ)5fqI!3H<MXU9&vz6(
z{+-pW)!+4IZ)E#SPxlu4ZB5T6`%i~+$Ni;yH@1JqbPT?5`L@pO_HLiI8V}yxzHq?9
zJ(J^4o3?ikoNoyHRUaP$Q(1{~oj@zn5lC^*)AhU^yrypIC9-dkVdm*(>I%5M#@=Mf
z66&Sl<&ip(qmpb@y!|FiuITF_igHms;A^0J@CWIxb1_tOi#D>Zni<078A#f0hs3Dl
z3jl0h(PC;k4ye_aej~?HmM=g8;x=RzK1+}`tV|w}ClKZma$J!#Y)j~IxdK`T=pzX=
zWI&J<SuHdL+%?#3SOM39oJdQns)23*gT=H14S=lISMYY5-tH&wJaOi{-8t`A!F$2<
zUdUW|%g}e;woSI!<lh^3G`u&w-}Jcc4}r&leD8btrqNAin}5Ypjqe<CKx)}eJwEmO
zz%$zH9nCk5Raqe=sh?=rV3F3HG69V@<Bgz=JdfrGZ%rYwxB7SjE7+G6WTe0dwpCe9
zlIUZLBRv^pu$R=)cp9ProF_yX7m^J*6;&XeT|aRt`;eT_*W@Go=q5}}&es5Fk+T(C
z1)<XvI``XhLT640!W#wNErG2-_LI#(PB>kDvz)w2$u_KQe+i9;)MC}i=Bp6XsKx4L
zkM$B9TIE=8)h&+oRsnsiw|e~@?X}*X^lXr`UTs#AYb7Iy#6SyG*7i>vld}{#$(s><
z3akt8Gl(@a)N`RTJCPT<GZW8+uIy4?=q0VD-O=qX(g<Wxo}AHhp=B2a+exb<JDL{)
zN6#(j!W_9fBm;c_TCp9R0-l?Umi!+JC5w4VS!Z90@s5k&TxnIlgrYD+j@%dqa=_9w
m{SDRp4b}8L$I_nfJ*_}S9Y9(eft+yy>41U52A=j<`u+<Wp+e9A

literal 0
HcmV?d00001

diff --git a/cleaning_functions.py b/cleaning_functions.py
new file mode 100644
index 0000000..ef5ccef
--- /dev/null
+++ b/cleaning_functions.py
@@ -0,0 +1,94 @@
+import pandas as pd
+import numpy as np
+
+
+def clean_column_names(df):
+    df.columns = df.columns.str.lower().str.replace(" ", "_")
+    df = df.rename(columns={"st": "state"})
+    return df 
+
+
+
+def clean_invalid_values(df):
+   
+    df["gender"] = (df["gender"].astype(str).str.strip().str.upper()
+                    .replace({"MALE": "M", "FEMALE": "F", "FEMAL": "F"}))
+    
+    
+    df["state"] = df["state"].replace({
+        "Cali": "California",
+        "AZ": "Arizona",
+        "WA": "Washington"
+    })
+    
+    
+    df["education"] = df["education"].replace({"Bachelors": "Bachelor"})
+    
+    
+    df["vehicle_class"] = df["vehicle_class"].replace({
+        "Sports Car": "Luxury",
+        "Luxury Car": "Luxury",
+        "Luxury SUV": "Luxury"
+    })
+    
+    return df
+
+
+def clean_clv(df):
+    df["customer_lifetime_value"] = (
+        df["customer_lifetime_value"].astype(str)
+        .str.replace("%", "", regex=False)
+        .str.replace("+", "", regex=False)
+        .astype(float) / 100
+    )
+    df["customer_lifetime_value"] = df["customer_lifetime_value"].round(2)
+    return df
+
+
+def clean_open_complaints(df):
+    col = df["number_of_open_complaints"]
+    if pd.api.types.is_numeric_dtype(col):
+        df["number_of_open_complaints"] = col.fillna(0).astype(int)
+        return df
+    col = col.astype(str)
+    splits = col.str.split("/", expand=True)
+    if splits.shape[1] > 1:
+        mid = splits.iloc[:, 1]   
+    else:
+        mid = splits.iloc[:, 0]   
+    df["number_of_open_complaints"] = (
+        pd.to_numeric(mid, errors="coerce")
+          .fillna(0)
+          .astype(int)
+    )
+    return df
+
+
+
+def handle_nulls(df):
+    num_cols = df.select_dtypes(include=["number"]).columns
+    cat_cols = df.select_dtypes(include=["object"]).columns
+    
+    for col in num_cols:
+        df[col] = df[col].fillna(df[col].median())
+    
+    for col in cat_cols:
+        df[col] = df[col].fillna(df[col].mode()[0])
+    
+    return df
+
+
+def remove_duplicates(df):
+    df = df.drop_duplicates(keep="first")
+    df = df.reset_index(drop=True)
+    return df
+
+
+def clean_data(df):
+    df = clean_column_names(df)
+    df = clean_invalid_values(df)
+    df = clean_clv(df)
+    df = clean_open_complaints(df)
+    df = handle_nulls(df)
+    df = remove_duplicates(df)
+    return df
diff --git a/lab-dw-data-structuring-and-combining.ipynb b/lab-dw-data-structuring-and-combining.ipynb
index ec4e3f9..874bd78 100644
--- a/lab-dw-data-structuring-and-combining.ipynb
+++ b/lab-dw-data-structuring-and-combining.ipynb
@@ -36,14 +36,192 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "id": "492d06e3-92c7-4105-ac72-536db98d3244",
    "metadata": {
     "id": "492d06e3-92c7-4105-ac72-536db98d3244"
    },
    "outputs": [],
    "source": [
-    "# Your code goes here"
+    "import pandas as pd\n",
+    "from cleaning_functions import clean_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "b0e59683",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file1 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\"\n",
+    "file2 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\"\n",
+    "file3 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\"\n",
+    "# Load datasets\n",
+    "df1 = pd.read_csv(file1)\n",
+    "df2 = pd.read_csv(file2)\n",
+    "df3 = pd.read_csv(file3)\n",
+    "# Clean datasets\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "a0bc80a2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "file1 columns: ['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value', 'Income', 'Monthly Premium Auto', 'Number of Open Complaints', 'Policy Type', 'Vehicle Class', 'Total Claim Amount']\n",
+      "file2 columns: ['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value', 'Income', 'Monthly Premium Auto', 'Number of Open Complaints', 'Total Claim Amount', 'Policy Type', 'Vehicle Class']\n",
+      "file3 columns: ['Customer', 'State', 'Customer Lifetime Value', 'Education', 'Gender', 'Income', 'Monthly Premium Auto', 'Number of Open Complaints', 'Policy Type', 'Total Claim Amount', 'Vehicle Class']\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"file1 columns:\", df1.columns.tolist())\n",
+    "print(\"file2 columns:\", df2.columns.tolist())\n",
+    "print(\"file3 columns:\", df3.columns.tolist())   \n",
+    "# Check column names for consistency"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "4e267477",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "file1: ['1/0/00' '1/2/00' '1/1/00' '1/3/00' '1/5/00' '1/4/00' nan]\n",
+      "file2: ['1/0/00' '1/1/00' '1/3/00' '1/5/00' '1/2/00' '1/4/00']\n",
+      "file3: [0 2 3 1 5 4]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"file1:\", df1[\"Number of Open Complaints\"].unique()[:10])\n",
+    "print(\"file2:\", df2[\"Number of Open Complaints\"].unique()[:10])\n",
+    "print(\"file3:\", df3[\"Number of Open Complaints\"].unique()[:10])\n",
+    "# Check unique values in 'Number of Open Complaints' column"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f23695b",
+   "metadata": {},
+   "source": [
+    "Went straight to this colunm because I remebered that changed this in the last lab and got a message error when I run a \"clean_data\" for all the files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "90f48bb2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def clean_open_complaints(df):\n",
+    "    # Column name *after* clean_column_names()\n",
+    "    col = df[\"number_of_open_complaints\"].astype(str)\n",
+    "    # Split on '/' because we have values like '1/0/00'\n",
+    "    splits = col.str.split(\"/\", expand=True)\n",
+    "    # If we have format X/Y/Z, use middle (Y). If it's already a single number, use that.\n",
+    "    if splits.shape[1] >= 2:\n",
+    "        values = splits[1]\n",
+    "    else:\n",
+    "        values = splits[0]\n",
+    "    df[\"number_of_open_complaints\"] = (\n",
+    "        pd.to_numeric(values, errors=\"coerce\")\n",
+    "        .fillna(0)\n",
+    "        .astype(int)\n",
+    "    )\n",
+    "    return df\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "e310f5d1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import importlib\n",
+    "import cleaning_functions\n",
+    "importlib.reload(cleaning_functions)\n",
+    "from cleaning_functions import clean_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "1e268c8f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "41be0e2c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file1 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\"\n",
+    "file2 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file2.csv\"\n",
+    "file3 = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file3.csv\"\n",
+    "\n",
+    "df1 = pd.read_csv(file1)\n",
+    "df2 = pd.read_csv(file2)\n",
+    "df3 = pd.read_csv(file3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "e06995da",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 9138 entries, 0 to 9137\n",
+      "Data columns (total 11 columns):\n",
+      " #   Column                     Non-Null Count  Dtype  \n",
+      "---  ------                     --------------  -----  \n",
+      " 0   customer                   9138 non-null   object \n",
+      " 1   state                      9138 non-null   object \n",
+      " 2   gender                     9138 non-null   object \n",
+      " 3   education                  9138 non-null   object \n",
+      " 4   customer_lifetime_value    9138 non-null   float64\n",
+      " 5   income                     9138 non-null   float64\n",
+      " 6   monthly_premium_auto       9138 non-null   float64\n",
+      " 7   number_of_open_complaints  9138 non-null   int64  \n",
+      " 8   policy_type                9138 non-null   object \n",
+      " 9   vehicle_class              9138 non-null   object \n",
+      " 10  total_claim_amount         9138 non-null   float64\n",
+      "dtypes: float64(4), int64(1), object(6)\n",
+      "memory usage: 785.4+ KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df1_cleaned = clean_data(df1)\n",
+    "df2_cleaned = clean_data(df2)\n",
+    "df3_cleaned = clean_data(df3)\n",
+    "\n",
+    "df_combined = pd.concat([df1_cleaned, df2_cleaned, df3_cleaned], ignore_index=True)\n",
+    "\n",
+    "df_combined.info()"
    ]
   },
   {
@@ -72,14 +250,230 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26",
    "metadata": {
     "id": "aa10d9b0-1c27-4d3f-a8e4-db6ab73bfd26"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>unnamed:_0</th>\n",
+       "      <th>customer</th>\n",
+       "      <th>state</th>\n",
+       "      <th>customer_lifetime_value</th>\n",
+       "      <th>response</th>\n",
+       "      <th>coverage</th>\n",
+       "      <th>education</th>\n",
+       "      <th>effective_to_date</th>\n",
+       "      <th>employmentstatus</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>...</th>\n",
+       "      <th>number_of_policies</th>\n",
+       "      <th>policy_type</th>\n",
+       "      <th>policy</th>\n",
+       "      <th>renew_offer_type</th>\n",
+       "      <th>sales_channel</th>\n",
+       "      <th>total_claim_amount</th>\n",
+       "      <th>vehicle_class</th>\n",
+       "      <th>vehicle_size</th>\n",
+       "      <th>vehicle_type</th>\n",
+       "      <th>month</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>DK49336</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>4809.216960</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2011-02-18</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>Corporate L3</td>\n",
+       "      <td>Offer3</td>\n",
+       "      <td>Agent</td>\n",
+       "      <td>292.800000</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>KX64629</td>\n",
+       "      <td>California</td>\n",
+       "      <td>2228.525238</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2011-01-18</td>\n",
+       "      <td>Unemployed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L3</td>\n",
+       "      <td>Offer4</td>\n",
+       "      <td>Call Center</td>\n",
+       "      <td>744.924331</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>LZ68649</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>14947.917300</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>2011-02-10</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L3</td>\n",
+       "      <td>Offer3</td>\n",
+       "      <td>Call Center</td>\n",
+       "      <td>480.000000</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>XL78013</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>22332.439460</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Extended</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2011-01-11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>Corporate L3</td>\n",
+       "      <td>Offer2</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>484.013411</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>QA50777</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>9025.067525</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Premium</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>2011-01-17</td>\n",
+       "      <td>Medical Leave</td>\n",
+       "      <td>F</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L2</td>\n",
+       "      <td>Offer1</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>707.925645</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   unnamed:_0 customer       state  customer_lifetime_value response  \\\n",
+       "0           0  DK49336     Arizona              4809.216960       No   \n",
+       "1           1  KX64629  California              2228.525238       No   \n",
+       "2           2  LZ68649  Washington             14947.917300       No   \n",
+       "3           3  XL78013      Oregon             22332.439460      Yes   \n",
+       "4           4  QA50777      Oregon              9025.067525       No   \n",
+       "\n",
+       "   coverage education effective_to_date employmentstatus gender  ...  \\\n",
+       "0     Basic   College        2011-02-18         Employed      M  ...   \n",
+       "1     Basic   College        2011-01-18       Unemployed      F  ...   \n",
+       "2     Basic  Bachelor        2011-02-10         Employed      M  ...   \n",
+       "3  Extended   College        2011-01-11         Employed      M  ...   \n",
+       "4   Premium  Bachelor        2011-01-17    Medical Leave      F  ...   \n",
+       "\n",
+       "   number_of_policies     policy_type        policy  renew_offer_type  \\\n",
+       "0                   9  Corporate Auto  Corporate L3            Offer3   \n",
+       "1                   1   Personal Auto   Personal L3            Offer4   \n",
+       "2                   2   Personal Auto   Personal L3            Offer3   \n",
+       "3                   2  Corporate Auto  Corporate L3            Offer2   \n",
+       "4                   7   Personal Auto   Personal L2            Offer1   \n",
+       "\n",
+       "   sales_channel  total_claim_amount  vehicle_class  vehicle_size  \\\n",
+       "0          Agent          292.800000  Four-Door Car       Medsize   \n",
+       "1    Call Center          744.924331  Four-Door Car       Medsize   \n",
+       "2    Call Center          480.000000            SUV       Medsize   \n",
+       "3         Branch          484.013411  Four-Door Car       Medsize   \n",
+       "4         Branch          707.925645  Four-Door Car       Medsize   \n",
+       "\n",
+       "  vehicle_type month  \n",
+       "0            A     2  \n",
+       "1            A     1  \n",
+       "2            A     2  \n",
+       "3            A     1  \n",
+       "4            A     1  \n",
+       "\n",
+       "[5 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code goes here"
+    "import pandas as pd\n",
+    "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis_clean.csv\")\n",
+    "df.head()"
    ]
   },
   {
@@ -93,6 +487,127 @@
     "Round the total revenue to 2 decimal points.  Analyze the resulting table to draw insights."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "285a4645",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['unnamed:_0', 'customer', 'state', 'customer_lifetime_value',\n",
+       "       'response', 'coverage', 'education', 'effective_to_date',\n",
+       "       'employmentstatus', 'gender', 'income', 'location_code',\n",
+       "       'marital_status', 'monthly_premium_auto', 'months_since_last_claim',\n",
+       "       'months_since_policy_inception', 'number_of_open_complaints',\n",
+       "       'number_of_policies', 'policy_type', 'policy', 'renew_offer_type',\n",
+       "       'sales_channel', 'total_claim_amount', 'vehicle_class', 'vehicle_size',\n",
+       "       'vehicle_type', 'month'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "63a444f3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>customer_lifetime_value</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>sales_channel</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Agent</th>\n",
+       "      <td>8021.81</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Branch</th>\n",
+       "      <td>8060.62</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Call Center</th>\n",
+       "      <td>8110.36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Web</th>\n",
+       "      <td>7809.12</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               customer_lifetime_value\n",
+       "sales_channel                         \n",
+       "Agent                          8021.81\n",
+       "Branch                         8060.62\n",
+       "Call Center                    8110.36\n",
+       "Web                            7809.12"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pivot_sales = (\n",
+    "    df.pivot_table(\n",
+    "        index=\"sales_channel\",\n",
+    "        values=\"customer_lifetime_value\",\n",
+    "        aggfunc=\"mean\" \n",
+    ").round(2)\n",
+    ")\n",
+    "pivot_sales"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bb1b4788",
+   "metadata": {},
+   "source": [
+    "Actually, to do this analyse we would need to have a premium colunm, but considering that we don't, I used the CLV that is the closest data we have to make this analyses.\n",
+    "These are the insights:\n",
+    "- Call Center generated the highest revenue, meaning that customers who adquire through this channel contribute the most long term revenue\n",
+    "- Branch and Agent are in the middle,, indicating moderate long term revenue\n",
+    "- The lowest revenue channel is Web, it brings in customers with lower long term value revenue"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "640993b2-a291-436c-a34d-a551144f8196",
@@ -103,6 +618,125 @@
     "2. Create a pivot table that shows the average customer lifetime value per gender and education level. Analyze the resulting table to draw insights."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "8e34dde5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>education</th>\n",
+       "      <th>Bachelor</th>\n",
+       "      <th>College</th>\n",
+       "      <th>Doctor</th>\n",
+       "      <th>High School or Below</th>\n",
+       "      <th>Master</th>\n",
+       "      <th>Total</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>gender</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>F</th>\n",
+       "      <td>13386258.11</td>\n",
+       "      <td>12460107.91</td>\n",
+       "      <td>1465701.78</td>\n",
+       "      <td>13793600.12</td>\n",
+       "      <td>3874600.25</td>\n",
+       "      <td>44980268.17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>M</th>\n",
+       "      <td>12110061.83</td>\n",
+       "      <td>12851725.02</td>\n",
+       "      <td>1490482.06</td>\n",
+       "      <td>12680914.19</td>\n",
+       "      <td>3365559.06</td>\n",
+       "      <td>42498742.16</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Total</th>\n",
+       "      <td>25496319.95</td>\n",
+       "      <td>25311832.93</td>\n",
+       "      <td>2956183.84</td>\n",
+       "      <td>26474514.31</td>\n",
+       "      <td>7240159.30</td>\n",
+       "      <td>87479010.33</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "education     Bachelor      College      Doctor  High School or Below  \\\n",
+       "gender                                                                  \n",
+       "F          13386258.11  12460107.91  1465701.78           13793600.12   \n",
+       "M          12110061.83  12851725.02  1490482.06           12680914.19   \n",
+       "Total      25496319.95  25311832.93  2956183.84           26474514.31   \n",
+       "\n",
+       "education      Master        Total  \n",
+       "gender                              \n",
+       "F          3874600.25  44980268.17  \n",
+       "M          3365559.06  42498742.16  \n",
+       "Total      7240159.30  87479010.33  "
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pivot_clv = (\n",
+    "    df.pivot_table(\n",
+    "        index=\"gender\",\n",
+    "        columns=\"education\",\n",
+    "        values=\"customer_lifetime_value\",\n",
+    "        aggfunc=\"sum\",\n",
+    "        margins=True,\n",
+    "        margins_name=\"Total\",\n",
+    "    ).round(2)\n",
+    ")\n",
+    "pivot_clv"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f3745556",
+   "metadata": {},
+   "source": [
+    "- We have more customers female than male\n",
+    "- The majority of the customers have an education level of high school or below \n",
+    "- Comparing gender and education, we can say that the majority of the customers are female with high school or below "
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "32c7f2e5-3d90-43e5-be33-9781b6069198",
@@ -130,14 +764,335 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 49,
    "id": "3a069e0b-b400-470e-904d-d17582191be4",
    "metadata": {
     "id": "3a069e0b-b400-470e-904d-d17582191be4"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>unnamed:_0</th>\n",
+       "      <th>customer</th>\n",
+       "      <th>state</th>\n",
+       "      <th>customer_lifetime_value</th>\n",
+       "      <th>response</th>\n",
+       "      <th>coverage</th>\n",
+       "      <th>education</th>\n",
+       "      <th>effective_to_date</th>\n",
+       "      <th>employmentstatus</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>...</th>\n",
+       "      <th>policy_type</th>\n",
+       "      <th>policy</th>\n",
+       "      <th>renew_offer_type</th>\n",
+       "      <th>sales_channel</th>\n",
+       "      <th>total_claim_amount</th>\n",
+       "      <th>vehicle_class</th>\n",
+       "      <th>vehicle_size</th>\n",
+       "      <th>vehicle_type</th>\n",
+       "      <th>month</th>\n",
+       "      <th>efective_to_data</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>DK49336</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>4809.216960</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2011-02-18</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>Corporate L3</td>\n",
+       "      <td>Offer3</td>\n",
+       "      <td>Agent</td>\n",
+       "      <td>292.800000</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "      <td>February</td>\n",
+       "      <td>2011-02-18</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>KX64629</td>\n",
+       "      <td>California</td>\n",
+       "      <td>2228.525238</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2011-01-18</td>\n",
+       "      <td>Unemployed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L3</td>\n",
+       "      <td>Offer4</td>\n",
+       "      <td>Call Center</td>\n",
+       "      <td>744.924331</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "      <td>January</td>\n",
+       "      <td>2011-01-18</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>LZ68649</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>14947.917300</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>2011-02-10</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L3</td>\n",
+       "      <td>Offer3</td>\n",
+       "      <td>Call Center</td>\n",
+       "      <td>480.000000</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "      <td>February</td>\n",
+       "      <td>2011-02-10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>XL78013</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>22332.439460</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Extended</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2011-01-11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>Corporate L3</td>\n",
+       "      <td>Offer2</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>484.013411</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "      <td>January</td>\n",
+       "      <td>2011-01-11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>QA50777</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>9025.067525</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Premium</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>2011-01-17</td>\n",
+       "      <td>Medical Leave</td>\n",
+       "      <td>F</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L2</td>\n",
+       "      <td>Offer1</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>707.925645</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "      <td>A</td>\n",
+       "      <td>January</td>\n",
+       "      <td>2011-01-17</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   unnamed:_0 customer       state  customer_lifetime_value response  \\\n",
+       "0           0  DK49336     Arizona              4809.216960       No   \n",
+       "1           1  KX64629  California              2228.525238       No   \n",
+       "2           2  LZ68649  Washington             14947.917300       No   \n",
+       "3           3  XL78013      Oregon             22332.439460      Yes   \n",
+       "4           4  QA50777      Oregon              9025.067525       No   \n",
+       "\n",
+       "   coverage education effective_to_date employmentstatus gender  ...  \\\n",
+       "0     Basic   College        2011-02-18         Employed      M  ...   \n",
+       "1     Basic   College        2011-01-18       Unemployed      F  ...   \n",
+       "2     Basic  Bachelor        2011-02-10         Employed      M  ...   \n",
+       "3  Extended   College        2011-01-11         Employed      M  ...   \n",
+       "4   Premium  Bachelor        2011-01-17    Medical Leave      F  ...   \n",
+       "\n",
+       "      policy_type        policy renew_offer_type  sales_channel  \\\n",
+       "0  Corporate Auto  Corporate L3           Offer3          Agent   \n",
+       "1   Personal Auto   Personal L3           Offer4    Call Center   \n",
+       "2   Personal Auto   Personal L3           Offer3    Call Center   \n",
+       "3  Corporate Auto  Corporate L3           Offer2         Branch   \n",
+       "4   Personal Auto   Personal L2           Offer1         Branch   \n",
+       "\n",
+       "   total_claim_amount  vehicle_class  vehicle_size  vehicle_type     month  \\\n",
+       "0          292.800000  Four-Door Car       Medsize             A  February   \n",
+       "1          744.924331  Four-Door Car       Medsize             A   January   \n",
+       "2          480.000000            SUV       Medsize             A  February   \n",
+       "3          484.013411  Four-Door Car       Medsize             A   January   \n",
+       "4          707.925645  Four-Door Car       Medsize             A   January   \n",
+       "\n",
+       "  efective_to_data  \n",
+       "0       2011-02-18  \n",
+       "1       2011-01-18  \n",
+       "2       2011-02-10  \n",
+       "3       2011-01-11  \n",
+       "4       2011-01-17  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df[\"efective_to_data\"] = pd.to_datetime(df[\"effective_to_date\"])\n",
+    "df[\"month\"] = df[\"efective_to_data\"].dt.month_name()\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "id": "453ac779",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>policy_type</th>\n",
+       "      <th>month</th>\n",
+       "      <th>number_of_open_complaints</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>February</td>\n",
+       "      <td>385.208135</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>January</td>\n",
+       "      <td>443.434952</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>February</td>\n",
+       "      <td>1453.684441</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>January</td>\n",
+       "      <td>1727.605722</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Special Auto</td>\n",
+       "      <td>February</td>\n",
+       "      <td>95.226817</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Special Auto</td>\n",
+       "      <td>January</td>\n",
+       "      <td>87.074049</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      policy_type     month  number_of_open_complaints\n",
+       "0  Corporate Auto  February                 385.208135\n",
+       "1  Corporate Auto   January                 443.434952\n",
+       "2   Personal Auto  February                1453.684441\n",
+       "3   Personal Auto   January                1727.605722\n",
+       "4    Special Auto  February                  95.226817\n",
+       "5    Special Auto   January                  87.074049"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "complaints_summary = (\n",
+    "    df.groupby([\"policy_type\", \"month\"])[\"number_of_open_complaints\"]\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    ")\n",
+    "complaints_summary"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6ac5ee88",
+   "metadata": {},
    "source": [
-    "# Your code goes here"
+    "The month with the highest number of complaints is January for Personal Auto policy type and Corporate Auto; for Special Auto is February "
    ]
   }
  ],
@@ -146,7 +1101,7 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -160,7 +1115,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,

	unnamed:_0	customer	state	customer_lifetime_value	response	coverage	education	effective_to_date	employmentstatus	gender	...	number_of_policies	policy_type	policy	renew_offer_type	sales_channel	total_claim_amount	vehicle_class	vehicle_size	vehicle_type	month
0	0	DK49336	Arizona	4809.216960	No	Basic	College	2011-02-18	Employed	M	...	9	Corporate Auto	Corporate L3	Offer3	Agent	292.800000	Four-Door Car	Medsize	A	2
1	1	KX64629	California	2228.525238	No	Basic	College	2011-01-18	Unemployed	F	...	1	Personal Auto	Personal L3	Offer4	Call Center	744.924331	Four-Door Car	Medsize	A	1
2	2	LZ68649	Washington	14947.917300	No	Basic	Bachelor	2011-02-10	Employed	M	...	2	Personal Auto	Personal L3	Offer3	Call Center	480.000000	SUV	Medsize	A	2
3	3	XL78013	Oregon	22332.439460	Yes	Extended	College	2011-01-11	Employed	M	...	2	Corporate Auto	Corporate L3	Offer2	Branch	484.013411	Four-Door Car	Medsize	A	1
4	4	QA50777	Oregon	9025.067525	No	Premium	Bachelor	2011-01-17	Medical Leave	F	...	7	Personal Auto	Personal L2	Offer1	Branch	707.925645	Four-Door Car	Medsize	A	1
	customer_lifetime_value
sales_channel
Agent	8021.81
Branch	8060.62
Call Center	8110.36
Web	7809.12
education	Bachelor	College	Doctor	High School or Below	Master	Total
gender
F	13386258.11	12460107.91	1465701.78	13793600.12	3874600.25	44980268.17
M	12110061.83	12851725.02	1490482.06	12680914.19	3365559.06	42498742.16
Total	25496319.95	25311832.93	2956183.84	26474514.31	7240159.30	87479010.33
	policy_type	month	number_of_open_complaints
0	Corporate Auto	February	385.208135
1	Corporate Auto	January	443.434952
2	Personal Auto	February	1453.684441
3	Personal Auto	January	1727.605722
4	Special Auto	February	95.226817
5	Special Auto	January	87.074049