data-bootcamp-v4 · Gabychytaa · Aug 25, 2025
diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -278,7 +278,7 @@
        "[800 rows x 11 columns]"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -297,11 +297,58 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import scipy.stats as st\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#code here"
+    "#Two-Sample T-Test \n",
+    "\n",
+    "def is_type(df, t):\n",
+    "    return (df['Type 1'].eq(t)) | (df['Type 2'].eq(t))\n",
+    "\n",
+    "dragons = df.loc[is_type(df, 'Dragon'), 'HP'].dropna().to_numpy()\n",
+    "grass   = df.loc[is_type(df, 'Grass'),  'HP'].dropna().to_numpy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "t=4.499135, one-sided p=0.000007\n",
+      "Decision: Reject H0 (Dragon > Grass)\n"
+     ]
+    }
+   ],
+   "source": [
+    "tstat, p_two = st.ttest_ind(dragons, grass, equal_var=True) \n",
+    "\n",
+    "p_one = p_two/2 if tstat > 0 else 1 - p_two/2\n",
+    "alpha = 0.05\n",
+    "print(f\"t={tstat:3f}, one-sided p={p_one:4f}\")\n",
+    "print(\"Decision:\", \"Reject H0 (Dragon > Grass)\" if p_one < alpha else \"Fail to reject H0\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Fail to reject 𝐻0; the data do not provide sufficient evidence that Dragons have higher mean HP than Grass at 5%."
    ]
   },
   {
@@ -313,11 +360,125 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#code here"
+    "stats_cols = ['HP','Attack','Defense','Sp. Atk','Sp. Def','Speed']\n",
+    "leg = df[df['Legendary'] == True]\n",
+    "non = df[df['Legendary'] == False]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>stat</th>\n",
+       "      <th>t</th>\n",
+       "      <th>p_two_sided</th>\n",
+       "      <th>sig_5%</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Sp. Atk</td>\n",
+       "      <td>14.191406</td>\n",
+       "      <td>6.314916e-41</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Sp. Def</td>\n",
+       "      <td>11.037751</td>\n",
+       "      <td>1.843981e-26</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Attack</td>\n",
+       "      <td>10.397321</td>\n",
+       "      <td>7.827253e-24</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Speed</td>\n",
+       "      <td>9.765234</td>\n",
+       "      <td>2.354075e-21</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>HP</td>\n",
+       "      <td>8.036124</td>\n",
+       "      <td>3.330648e-15</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Defense</td>\n",
+       "      <td>7.181240</td>\n",
+       "      <td>1.584223e-12</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      stat          t   p_two_sided  sig_5%\n",
+       "3  Sp. Atk  14.191406  6.314916e-41    True\n",
+       "4  Sp. Def  11.037751  1.843981e-26    True\n",
+       "1   Attack  10.397321  7.827253e-24    True\n",
+       "5    Speed   9.765234  2.354075e-21    True\n",
+       "0       HP   8.036124  3.330648e-15    True\n",
+       "2  Defense   7.181240  1.584223e-12    True"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rows = []\n",
+    "for col in stats_cols:\n",
+    "    x = leg[col].dropna().to_numpy()\n",
+    "    y = non[col].dropna().to_numpy()\n",
+    "    t, p = st.ttest_ind(x, y, equal_var=True)\n",
+    "    rows.append({\"stat\": col, \"t\": t, \"p_two_sided\": p, \"sig_5%\": p < 0.05})\n",
+    "\n",
+    "pd.DataFrame(rows).sort_values(\"p_two_sided\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "“At the 5% level, Legendary Pokémon show significantly higher average values in [significant stats] compared with Non-Legendary, while differences in [non-significant stats] are not statistically significant."
    ]
   },
   {
@@ -337,7 +498,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -453,7 +614,7 @@
        "4       624.0       262.0         1.9250             65500.0  "
       ]
      },
-     "execution_count": 5,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -483,22 +644,60 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "school_lon, school_lat = -118.0, 34.0\n",
+    "hospital_lon, hospital_lat = -122.0, 37.0"
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "def euclidean(lon, lat, ref_lon, ref_lat):\n",
+    "    return np.sqrt((lon - ref_lon)**2 + (lat - ref_lat)**2)\n",
+    "\n",
+    "df['dist_school']   = euclidean(df['longitude'], df['latitude'], school_lon,   school_lat)\n",
+    "df['dist_hospital'] = euclidean(df['longitude'], df['latitude'], hospital_lon, hospital_lat)\n",
+    "df['near_either'] = (df['dist_school'] < 0.50) | (df['dist_hospital'] < 0.50)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Near n=6829, Far n=10171\n",
+      "Means: Near=246952, Far=180678\n",
+      "t=38.046, two-sided p=4.818e-304\n",
+      "Decision: Significant difference\n"
+     ]
+    }
+   ],
+   "source": [
+    "near_vals = df.loc[df['near_either'],  'median_house_value'].dropna().to_numpy()\n",
+    "far_vals  = df.loc[~df['near_either'], 'median_house_value'].dropna().to_numpy()\n",
+    "\n",
+    "t, p = st.ttest_ind(near_vals, far_vals, equal_var=True)  # pooled, as in lesson\n",
+    "alpha = 0.05\n",
+    "print(f\"Near n={len(near_vals)}, Far n={len(far_vals)}\")\n",
+    "print(f\"Means: Near={near_vals.mean():.0f}, Far={far_vals.mean():.0f}\")\n",
+    "print(f\"t={t:.3f}, two-sided p={p:.4g}\")\n",
+    "print(\"Decision:\", \"Significant difference\" if p < alpha else \"No significant difference\")"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -512,7 +711,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,