diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb
index 0cc26d5..f359ea8 100644
--- a/lab-hypothesis-testing.ipynb
+++ b/lab-hypothesis-testing.ipynb
@@ -51,7 +51,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -278,7 +278,7 @@
"[800 rows x 11 columns]"
]
},
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -297,11 +297,58 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import scipy.stats as st\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "#code here"
+ "#Two-Sample T-Test \n",
+ "\n",
+ "def is_type(df, t):\n",
+ " return (df['Type 1'].eq(t)) | (df['Type 2'].eq(t))\n",
+ "\n",
+ "dragons = df.loc[is_type(df, 'Dragon'), 'HP'].dropna().to_numpy()\n",
+ "grass = df.loc[is_type(df, 'Grass'), 'HP'].dropna().to_numpy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "t=4.499135, one-sided p=0.000007\n",
+ "Decision: Reject H0 (Dragon > Grass)\n"
+ ]
+ }
+ ],
+ "source": [
+ "tstat, p_two = st.ttest_ind(dragons, grass, equal_var=True) \n",
+ "\n",
+ "p_one = p_two/2 if tstat > 0 else 1 - p_two/2\n",
+ "alpha = 0.05\n",
+ "print(f\"t={tstat:3f}, one-sided p={p_one:4f}\")\n",
+ "print(\"Decision:\", \"Reject H0 (Dragon > Grass)\" if p_one < alpha else \"Fail to reject H0\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Fail to reject 𝐻0; the data do not provide sufficient evidence that Dragons have higher mean HP than Grass at 5%."
]
},
{
@@ -313,11 +360,125 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
- "#code here"
+ "stats_cols = ['HP','Attack','Defense','Sp. Atk','Sp. Def','Speed']\n",
+ "leg = df[df['Legendary'] == True]\n",
+ "non = df[df['Legendary'] == False]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " stat | \n",
+ " t | \n",
+ " p_two_sided | \n",
+ " sig_5% | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " Sp. Atk | \n",
+ " 14.191406 | \n",
+ " 6.314916e-41 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Sp. Def | \n",
+ " 11.037751 | \n",
+ " 1.843981e-26 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Attack | \n",
+ " 10.397321 | \n",
+ " 7.827253e-24 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Speed | \n",
+ " 9.765234 | \n",
+ " 2.354075e-21 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " HP | \n",
+ " 8.036124 | \n",
+ " 3.330648e-15 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Defense | \n",
+ " 7.181240 | \n",
+ " 1.584223e-12 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " stat t p_two_sided sig_5%\n",
+ "3 Sp. Atk 14.191406 6.314916e-41 True\n",
+ "4 Sp. Def 11.037751 1.843981e-26 True\n",
+ "1 Attack 10.397321 7.827253e-24 True\n",
+ "5 Speed 9.765234 2.354075e-21 True\n",
+ "0 HP 8.036124 3.330648e-15 True\n",
+ "2 Defense 7.181240 1.584223e-12 True"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rows = []\n",
+ "for col in stats_cols:\n",
+ " x = leg[col].dropna().to_numpy()\n",
+ " y = non[col].dropna().to_numpy()\n",
+ " t, p = st.ttest_ind(x, y, equal_var=True)\n",
+ " rows.append({\"stat\": col, \"t\": t, \"p_two_sided\": p, \"sig_5%\": p < 0.05})\n",
+ "\n",
+ "pd.DataFrame(rows).sort_values(\"p_two_sided\")\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "“At the 5% level, Legendary Pokémon show significantly higher average values in [significant stats] compared with Non-Legendary, while differences in [non-significant stats] are not statistically significant."
]
},
{
@@ -337,7 +498,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -453,7 +614,7 @@
"4 624.0 262.0 1.9250 65500.0 "
]
},
- "execution_count": 5,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -483,22 +644,60 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "school_lon, school_lat = -118.0, 34.0\n",
+ "hospital_lon, hospital_lat = -122.0, 37.0"
+ ]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "def euclidean(lon, lat, ref_lon, ref_lat):\n",
+ " return np.sqrt((lon - ref_lon)**2 + (lat - ref_lat)**2)\n",
+ "\n",
+ "df['dist_school'] = euclidean(df['longitude'], df['latitude'], school_lon, school_lat)\n",
+ "df['dist_hospital'] = euclidean(df['longitude'], df['latitude'], hospital_lon, hospital_lat)\n",
+ "df['near_either'] = (df['dist_school'] < 0.50) | (df['dist_hospital'] < 0.50)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Near n=6829, Far n=10171\n",
+ "Means: Near=246952, Far=180678\n",
+ "t=38.046, two-sided p=4.818e-304\n",
+ "Decision: Significant difference\n"
+ ]
+ }
+ ],
+ "source": [
+ "near_vals = df.loc[df['near_either'], 'median_house_value'].dropna().to_numpy()\n",
+ "far_vals = df.loc[~df['near_either'], 'median_house_value'].dropna().to_numpy()\n",
+ "\n",
+ "t, p = st.ttest_ind(near_vals, far_vals, equal_var=True) # pooled, as in lesson\n",
+ "alpha = 0.05\n",
+ "print(f\"Near n={len(near_vals)}, Far n={len(far_vals)}\")\n",
+ "print(f\"Means: Near={near_vals.mean():.0f}, Far={far_vals.mean():.0f}\")\n",
+ "print(f\"t={t:.3f}, two-sided p={p:.4g}\")\n",
+ "print(\"Decision:\", \"Significant difference\" if p < alpha else \"No significant difference\")"
+ ]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -512,7 +711,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.9"
+ "version": "3.12.7"
}
},
"nbformat": 4,