diff --git a/lab-dw-pandas.ipynb b/lab-dw-pandas.ipynb
index fbd46831..5ec019ad 100644
--- a/lab-dw-pandas.ipynb
+++ b/lab-dw-pandas.ipynb
@@ -80,14 +80,533 @@
"- Compute summary statistics for categorical columns and providing your conclusions based on these statistics."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "1222f185",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "68a61b80",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Customer | \n",
+ " ST | \n",
+ " GENDER | \n",
+ " Education | \n",
+ " Customer Lifetime Value | \n",
+ " Income | \n",
+ " Monthly Premium Auto | \n",
+ " Number of Open Complaints | \n",
+ " Policy Type | \n",
+ " Vehicle Class | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " RB50392 | \n",
+ " Washington | \n",
+ " NaN | \n",
+ " Master | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 1000.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 2.704934 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 697953.59% | \n",
+ " 0.0 | \n",
+ " 94.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 1131.464935 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " F | \n",
+ " Bachelor | \n",
+ " 1288743.17% | \n",
+ " 48767.0 | \n",
+ " 108.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Two-Door Car | \n",
+ " 566.472247 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " M | \n",
+ " Bachelor | \n",
+ " 764586.18% | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " 1/0/00 | \n",
+ " Corporate Auto | \n",
+ " SUV | \n",
+ " 529.881344 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " GA49547 | \n",
+ " Washington | \n",
+ " M | \n",
+ " High School or Below | \n",
+ " 536307.65% | \n",
+ " 36357.0 | \n",
+ " 68.0 | \n",
+ " 1/0/00 | \n",
+ " Personal Auto | \n",
+ " Four-Door Car | \n",
+ " 17.269323 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 4003 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4004 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4005 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4006 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4007 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4008 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Customer ST GENDER Education \\\n",
+ "0 RB50392 Washington NaN Master \n",
+ "1 QZ44356 Arizona F Bachelor \n",
+ "2 AI49188 Nevada F Bachelor \n",
+ "3 WW63253 California M Bachelor \n",
+ "4 GA49547 Washington M High School or Below \n",
+ "... ... ... ... ... \n",
+ "4003 NaN NaN NaN NaN \n",
+ "4004 NaN NaN NaN NaN \n",
+ "4005 NaN NaN NaN NaN \n",
+ "4006 NaN NaN NaN NaN \n",
+ "4007 NaN NaN NaN NaN \n",
+ "\n",
+ " Customer Lifetime Value Income Monthly Premium Auto \\\n",
+ "0 NaN 0.0 1000.0 \n",
+ "1 697953.59% 0.0 94.0 \n",
+ "2 1288743.17% 48767.0 108.0 \n",
+ "3 764586.18% 0.0 106.0 \n",
+ "4 536307.65% 36357.0 68.0 \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Number of Open Complaints Policy Type Vehicle Class \\\n",
+ "0 1/0/00 Personal Auto Four-Door Car \n",
+ "1 1/0/00 Personal Auto Four-Door Car \n",
+ "2 1/0/00 Personal Auto Two-Door Car \n",
+ "3 1/0/00 Corporate Auto SUV \n",
+ "4 1/0/00 Personal Auto Four-Door Car \n",
+ "... ... ... ... \n",
+ "4003 NaN NaN NaN \n",
+ "4004 NaN NaN NaN \n",
+ "4005 NaN NaN NaN \n",
+ "4006 NaN NaN NaN \n",
+ "4007 NaN NaN NaN \n",
+ "\n",
+ " Total Claim Amount \n",
+ "0 2.704934 \n",
+ "1 1131.464935 \n",
+ "2 566.472247 \n",
+ "3 529.881344 \n",
+ "4 17.269323 \n",
+ "... ... \n",
+ "4003 NaN \n",
+ "4004 NaN \n",
+ "4005 NaN \n",
+ "4006 NaN \n",
+ "4007 NaN \n",
+ "\n",
+ "[4008 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Loading the data\n",
+ "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/file1.csv\"\n",
+ "df = pd.read_csv(url)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "a1eac870",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value',\n",
+ " 'Income', 'Monthly Premium Auto', 'Number of Open Complaints',\n",
+ " 'Policy Type', 'Vehicle Class', 'Total Claim Amount'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"id": "dd4e8cd8-a6f6-486c-a5c4-1745b0c035f4",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape: (4008, 11)\n",
+ "\n",
+ "Data types:\n",
+ " Customer object\n",
+ "ST object\n",
+ "GENDER object\n",
+ "Education object\n",
+ "Customer Lifetime Value object\n",
+ "Income float64\n",
+ "Monthly Premium Auto float64\n",
+ "Number of Open Complaints object\n",
+ "Policy Type object\n",
+ "Vehicle Class object\n",
+ "Total Claim Amount float64\n",
+ "dtype: object\n",
+ "\n",
+ "Unique value counts per column:\n",
+ " Customer 1071\n",
+ "ST 8\n",
+ "GENDER 5\n",
+ "Education 6\n",
+ "Customer Lifetime Value 1027\n",
+ "Income 774\n",
+ "Monthly Premium Auto 132\n",
+ "Number of Open Complaints 6\n",
+ "Policy Type 3\n",
+ "Vehicle Class 6\n",
+ "Total Claim Amount 761\n",
+ "dtype: int64\n",
+ "\n",
+ "Categorical columns: ['Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value', 'Number of Open Complaints', 'Policy Type', 'Vehicle Class']\n",
+ "\n",
+ "Customer -> uniques: ['RB50392' 'QZ44356' 'AI49188' ... 'CW49887' 'MY31220' nan]\n",
+ "\n",
+ "ST -> uniques: ['Washington' 'Arizona' 'Nevada' 'California' 'Oregon' 'Cali' 'AZ' 'WA'\n",
+ " nan]\n",
+ "\n",
+ "GENDER -> uniques: [nan 'F' 'M' 'Femal' 'Male' 'female']\n",
+ "\n",
+ "Education -> uniques: ['Master' 'Bachelor' 'High School or Below' 'College' 'Bachelors' 'Doctor'\n",
+ " nan]\n",
+ "\n",
+ "Customer Lifetime Value -> uniques: [nan '697953.59%' '1288743.17%' ... '2031499.76%' '323912.47%'\n",
+ " '899704.02%']\n",
+ "\n",
+ "Number of Open Complaints -> uniques: ['1/0/00' '1/2/00' '1/1/00' '1/3/00' '1/5/00' '1/4/00' nan]\n",
+ "\n",
+ "Policy Type -> uniques: ['Personal Auto' 'Corporate Auto' 'Special Auto' nan]\n",
+ "\n",
+ "Vehicle Class -> uniques: ['Four-Door Car' 'Two-Door Car' 'SUV' 'Luxury SUV' 'Sports Car'\n",
+ " 'Luxury Car' nan]\n",
+ "\n",
+ "Numeric ranges:\n",
+ "Income: 0.0 to 99960.0\n",
+ "Monthly Premium Auto: 61.0 to 35354.0\n",
+ "Total Claim Amount: 0.382107 to 2893.239678\n",
+ "\n",
+ "Numeric summary:\n",
+ " Income Monthly Premium Auto Total Claim Amount\n",
+ "count 1071.000000 1071.000000 1071.000000\n",
+ "mean 39295.701214 193.234360 404.986909\n",
+ "std 30469.427060 1601.190369 293.027260\n",
+ "min 0.000000 61.000000 0.382107\n",
+ "25% 14072.000000 68.000000 202.157702\n",
+ "50% 36234.000000 83.000000 354.729129\n",
+ "75% 64631.000000 109.500000 532.800000\n",
+ "max 99960.000000 35354.000000 2893.239678\n",
+ "\n",
+ "Median:\n",
+ " Income 36234.000000\n",
+ "Monthly Premium Auto 83.000000\n",
+ "Total Claim Amount 354.729129\n",
+ "dtype: float64\n",
+ "\n",
+ "Mode:\n",
+ " Income 0.0\n",
+ "Monthly Premium Auto 65.0\n",
+ "Total Claim Amount 321.6\n",
+ "Name: 0, dtype: float64\n",
+ "\n",
+ "Customer value counts:\n",
+ " Customer\n",
+ "RB50392 1\n",
+ "HJ15383 1\n",
+ "AO74776 1\n",
+ "HQ82233 1\n",
+ "OL72737 1\n",
+ " ..\n",
+ "RO26085 1\n",
+ "ES57969 1\n",
+ "JK55587 1\n",
+ "RN97635 1\n",
+ "MY31220 1\n",
+ "Name: count, Length: 1071, dtype: int64\n",
+ "Top value: AA71604\n",
+ "\n",
+ "ST value counts:\n",
+ " ST\n",
+ "Oregon 320\n",
+ "California 211\n",
+ "Arizona 186\n",
+ "Cali 120\n",
+ "Nevada 98\n",
+ "Washington 81\n",
+ "WA 30\n",
+ "AZ 25\n",
+ "Name: count, dtype: int64\n",
+ "Top value: Oregon\n",
+ "\n",
+ "GENDER value counts:\n",
+ " GENDER\n",
+ "F 457\n",
+ "M 413\n",
+ "Male 39\n",
+ "female 28\n",
+ "Femal 17\n",
+ "Name: count, dtype: int64\n",
+ "Top value: F\n",
+ "\n",
+ "Education value counts:\n",
+ " Education\n",
+ "Bachelor 324\n",
+ "College 313\n",
+ "High School or Below 296\n",
+ "Master 94\n",
+ "Doctor 37\n",
+ "Bachelors 7\n",
+ "Name: count, dtype: int64\n",
+ "Top value: Bachelor\n",
+ "\n",
+ "Customer Lifetime Value value counts:\n",
+ " Customer Lifetime Value\n",
+ "445811.34% 4\n",
+ "251459.20% 4\n",
+ "272535.64% 3\n",
+ "578018.22% 3\n",
+ "684615.03% 3\n",
+ " ..\n",
+ "245357.08% 1\n",
+ "507566.27% 1\n",
+ "321497.94% 1\n",
+ "1227534.31% 1\n",
+ "899704.02% 1\n",
+ "Name: count, Length: 1027, dtype: int64\n",
+ "Top value: 251459.20%\n",
+ "\n",
+ "Number of Open Complaints value counts:\n",
+ " Number of Open Complaints\n",
+ "1/0/00 830\n",
+ "1/1/00 138\n",
+ "1/2/00 50\n",
+ "1/3/00 34\n",
+ "1/4/00 13\n",
+ "1/5/00 6\n",
+ "Name: count, dtype: int64\n",
+ "Top value: 1/0/00\n",
+ "\n",
+ "Policy Type value counts:\n",
+ " Policy Type\n",
+ "Personal Auto 780\n",
+ "Corporate Auto 234\n",
+ "Special Auto 57\n",
+ "Name: count, dtype: int64\n",
+ "Top value: Personal Auto\n",
+ "\n",
+ "Vehicle Class value counts:\n",
+ " Vehicle Class\n",
+ "Four-Door Car 576\n",
+ "Two-Door Car 205\n",
+ "SUV 199\n",
+ "Sports Car 57\n",
+ "Luxury SUV 20\n",
+ "Luxury Car 14\n",
+ "Name: count, dtype: int64\n",
+ "Top value: Four-Door Car\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here"
+ "# Quick look at basic info\n",
+ "print(\"Shape:\", df.shape)\n",
+ "print(\"\\nData types:\\n\", df.dtypes)\n",
+ "\n",
+ "# maybe check if some columns need fixing\n",
+ "# if 'Customer Lifetime Value' looks weird, convert it:\n",
+ "# df['Customer Lifetime Value'] = pd.to_numeric(df['Customer Lifetime Value'], errors='coerce')\n",
+ "\n",
+ "# Unique counts\n",
+ "print(\"\\nUnique value counts per column:\\n\", df.nunique())\n",
+ "\n",
+ "# Find object-type cols (likely categoricals)\n",
+ "cat_cols = df.select_dtypes(include=\"object\").columns\n",
+ "print(\"\\nCategorical columns:\", list(cat_cols))\n",
+ "\n",
+ "# Peek at their unique values (can get messy if many)\n",
+ "for c in cat_cols:\n",
+ " print(f\"\\n{c} -> uniques:\", df[c].unique())\n",
+ "\n",
+ "# Ranges for numeric cols\n",
+ "num_cols = df.select_dtypes(include=[\"int64\", \"float64\"]).columns\n",
+ "print(\"\\nNumeric ranges:\")\n",
+ "for c in num_cols:\n",
+ " print(f\"{c}: {df[c].min()} to {df[c].max()}\")\n",
+ "\n",
+ "# Summary stats\n",
+ "print(\"\\nNumeric summary:\\n\", df[num_cols].describe())\n",
+ "\n",
+ "# Extra quick stats\n",
+ "print(\"\\nMedian:\\n\", df[num_cols].median())\n",
+ "print(\"\\nMode:\\n\", df[num_cols].mode().iloc[0])\n",
+ "\n",
+ "# Frequency for categories\n",
+ "for c in cat_cols:\n",
+ " print(f\"\\n{c} value counts:\\n\", df[c].value_counts())\n",
+ " print(\"Top value:\", df[c].mode()[0])\n"
]
},
{
@@ -116,12 +635,48 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"id": "2dca5073-4520-4f42-9390-4b92733284ed",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Value counts for ST:\n",
+ " ST\n",
+ "Oregon 320\n",
+ "California 211\n",
+ "Arizona 186\n",
+ "Cali 120\n",
+ "Nevada 98\n",
+ "Washington 81\n",
+ "WA 30\n",
+ "AZ 25\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "5 least common states (ascending):\n",
+ " ST\n",
+ "AZ 25\n",
+ "WA 30\n",
+ "Washington 81\n",
+ "Nevada 98\n",
+ "Cali 120\n",
+ "Name: count, dtype: int64\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here"
+ "if 'ST' in df.columns:\n",
+ " st_counts = df['ST'].value_counts()\n",
+ " \n",
+ " # quick peek\n",
+ " print(\"\\nValue counts for ST:\\n\", st_counts)\n",
+ " \n",
+ " # least common 5\n",
+ " print(\"\\n5 least common states (ascending):\\n\", st_counts.sort_values().head(5))\n",
+ "\n"
]
},
{
@@ -146,12 +701,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"id": "bcfad6c1-9af2-4b0b-9aa9-0dc5c17473c0",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Total policies sold by type:\n",
+ " Policy Type\n",
+ "Personal Auto 780\n",
+ "Corporate Auto 234\n",
+ "Special Auto 57\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "Most sold policy type: Personal Auto with 780 policies\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here"
+ "# Policy Type analysis\n",
+ "if 'Policy Type' in df.columns:\n",
+ " policy_counts = df['Policy Type'].value_counts()\n",
+ " print(\"\\nTotal policies sold by type:\\n\", policy_counts)\n",
+ " \n",
+ " # which one is the top seller?\n",
+ " top_policy = policy_counts.idxmax()\n",
+ " print(\"\\nMost sold policy type:\", top_policy, \"with\", policy_counts.max(), \"policies\")\n",
+ "else:\n",
+ " print(\"\\n 'Policy Type' column not found. Check column names.\")\n"
]
},
{
@@ -176,12 +756,51 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"id": "0c0563cf-6f8b-463d-a321-651a972f82e5",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Number of Personal Auto policies: 780\n",
+ "Number of Corporate Auto policies: 234\n",
+ "\n",
+ "Average income for Personal Auto: 38180.7\n",
+ "Average income for Corporate Auto: 41390.31\n",
+ "Customers with Personal Auto have a lower average income.\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here"
+ "# Your code here\n",
+ "\n",
+ "# Compare average income: Personal Auto vs Corporate Auto\n",
+ "if 'Policy Type' in df.columns and 'Income' in df.columns:\n",
+ " \n",
+ " personal_df = df.loc[df['Policy Type'] == 'Personal Auto']\n",
+ " corporate_df = df.loc[df['Policy Type'] == 'Corporate Auto']\n",
+ " \n",
+ " # quick sanity check on counts\n",
+ " print(\"\\nNumber of Personal Auto policies:\", len(personal_df))\n",
+ " print(\"Number of Corporate Auto policies:\", len(corporate_df))\n",
+ " \n",
+ " avg_personal = personal_df['Income'].mean()\n",
+ " avg_corporate = corporate_df['Income'].mean()\n",
+ " \n",
+ " print(\"\\nAverage income for Personal Auto:\", round(avg_personal, 2))\n",
+ " print(\"Average income for Corporate Auto:\", round(avg_corporate, 2))\n",
+ " \n",
+ " if avg_personal < avg_corporate:\n",
+ " print(\"Customers with Personal Auto have a lower average income.\")\n",
+ " elif avg_personal > avg_corporate:\n",
+ " print(\"Customers with Personal Auto have a higher average income.\")\n",
+ " else:\n",
+ " print(\"Average incomes are the same.\")\n",
+ "else:\n",
+ " print(\"\\n Required columns not found. Check column names.\")\n"
]
},
{
@@ -229,15 +848,62 @@
"execution_count": null,
"id": "b731bca6-a760-4860-a27b-a33efa712ce0",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Summary stats for Total Claim Amount:\n",
+ " count 1071.000000\n",
+ "mean 404.986909\n",
+ "std 293.027260\n",
+ "min 0.382107\n",
+ "25% 202.157702\n",
+ "50% 354.729129\n",
+ "75% 532.800000\n",
+ "max 2893.239678\n",
+ "Name: Total Claim Amount, dtype: float64\n",
+ "\n",
+ "75th percentile value: 532.8\n"
+ ]
+ },
+ {
+ "ename": "NameError",
+ "evalue": "name 'cutoff_75_' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[12], line 14\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m75th percentile value:\u001b[39m\u001b[38;5;124m\"\u001b[39m, cutoff_75)\n\u001b[0;32m 13\u001b[0m \u001b[38;5;66;03m# filter top 25% customers\u001b[39;00m\n\u001b[1;32m---> 14\u001b[0m high_claim_df \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mloc[df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTotal Claim Amount\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m>\u001b[39m cutoff_75_]()\n",
+ "\u001b[1;31mNameError\u001b[0m: name 'cutoff_75_' is not defined"
+ ]
+ }
+ ],
"source": [
- "# Your code here"
+ "# Your code here\n",
+ "\n",
+ "# high claim amount analysis\n",
+ "if 'Total Claim Amount' in df.columns:\n",
+ " \n",
+ " # quick look at stats for context\n",
+ " print(\"\\nSummary stats for Total Claim Amount:\\n\", df['Total Claim Amount'].describe())\n",
+ " \n",
+ " # 75th percentile cutoff\n",
+ " cutoff_75 = df['Total Claim Amount'].quantile(0.75)\n",
+ " print(\"\\n75th percentile value:\", cutoff_75)\n",
+ " \n",
+ " # filter top 25% customers\n",
+ " high_claim_df = df.loc[df['Total Claim Amount'] > cutoff_75_]()\n",
+ "\n",
+ "\n",
+ "#I am not entire sure I got this right, but I think it should be something like this"
]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -251,7 +917,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.12.7"
}
},
"nbformat": 4,