From 6b867be73990df5e5ad2331371d5fc71f8cbcc8c Mon Sep 17 00:00:00 2001 From: Gabychytaa Date: Mon, 25 Aug 2025 17:05:51 +0200 Subject: [PATCH] Solved lab --- lab-hypothesis-testing.ipynb | 227 ++++++++++++++++++++++++++++++++--- 1 file changed, 213 insertions(+), 14 deletions(-) diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb index 0cc26d5..f359ea8 100644 --- a/lab-hypothesis-testing.ipynb +++ b/lab-hypothesis-testing.ipynb @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -278,7 +278,7 @@ "[800 rows x 11 columns]" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -297,11 +297,58 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import scipy.stats as st\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "#code here" + "#Two-Sample T-Test \n", + "\n", + "def is_type(df, t):\n", + " return (df['Type 1'].eq(t)) | (df['Type 2'].eq(t))\n", + "\n", + "dragons = df.loc[is_type(df, 'Dragon'), 'HP'].dropna().to_numpy()\n", + "grass = df.loc[is_type(df, 'Grass'), 'HP'].dropna().to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "t=4.499135, one-sided p=0.000007\n", + "Decision: Reject H0 (Dragon > Grass)\n" + ] + } + ], + "source": [ + "tstat, p_two = st.ttest_ind(dragons, grass, equal_var=True) \n", + "\n", + "p_one = p_two/2 if tstat > 0 else 1 - p_two/2\n", + "alpha = 0.05\n", + "print(f\"t={tstat:3f}, one-sided p={p_one:4f}\")\n", + "print(\"Decision:\", \"Reject H0 (Dragon > Grass)\" if p_one < alpha else \"Fail to reject H0\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fail to reject 𝐻0; the data do not provide sufficient evidence that Dragons have higher mean HP than Grass at 5%." ] }, { @@ -313,11 +360,125 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "#code here" + "stats_cols = ['HP','Attack','Defense','Sp. Atk','Sp. Def','Speed']\n", + "leg = df[df['Legendary'] == True]\n", + "non = df[df['Legendary'] == False]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stattp_two_sidedsig_5%
3Sp. Atk14.1914066.314916e-41True
4Sp. Def11.0377511.843981e-26True
1Attack10.3973217.827253e-24True
5Speed9.7652342.354075e-21True
0HP8.0361243.330648e-15True
2Defense7.1812401.584223e-12True
\n", + "
" + ], + "text/plain": [ + " stat t p_two_sided sig_5%\n", + "3 Sp. Atk 14.191406 6.314916e-41 True\n", + "4 Sp. Def 11.037751 1.843981e-26 True\n", + "1 Attack 10.397321 7.827253e-24 True\n", + "5 Speed 9.765234 2.354075e-21 True\n", + "0 HP 8.036124 3.330648e-15 True\n", + "2 Defense 7.181240 1.584223e-12 True" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rows = []\n", + "for col in stats_cols:\n", + " x = leg[col].dropna().to_numpy()\n", + " y = non[col].dropna().to_numpy()\n", + " t, p = st.ttest_ind(x, y, equal_var=True)\n", + " rows.append({\"stat\": col, \"t\": t, \"p_two_sided\": p, \"sig_5%\": p < 0.05})\n", + "\n", + "pd.DataFrame(rows).sort_values(\"p_two_sided\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "“At the 5% level, Legendary Pokémon show significantly higher average values in [significant stats] compared with Non-Legendary, while differences in [non-significant stats] are not statistically significant." ] }, { @@ -337,7 +498,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -453,7 +614,7 @@ "4 624.0 262.0 1.9250 65500.0 " ] }, - "execution_count": 5, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -483,22 +644,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "school_lon, school_lat = -118.0, 34.0\n", + "hospital_lon, hospital_lat = -122.0, 37.0" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def euclidean(lon, lat, ref_lon, ref_lat):\n", + " return np.sqrt((lon - ref_lon)**2 + (lat - ref_lat)**2)\n", + "\n", + "df['dist_school'] = euclidean(df['longitude'], df['latitude'], school_lon, school_lat)\n", + "df['dist_hospital'] = euclidean(df['longitude'], df['latitude'], hospital_lon, hospital_lat)\n", + "df['near_either'] = (df['dist_school'] < 0.50) | (df['dist_hospital'] < 0.50)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Near n=6829, Far n=10171\n", + "Means: Near=246952, Far=180678\n", + "t=38.046, two-sided p=4.818e-304\n", + "Decision: Significant difference\n" + ] + } + ], + "source": [ + "near_vals = df.loc[df['near_either'], 'median_house_value'].dropna().to_numpy()\n", + "far_vals = df.loc[~df['near_either'], 'median_house_value'].dropna().to_numpy()\n", + "\n", + "t, p = st.ttest_ind(near_vals, far_vals, equal_var=True) # pooled, as in lesson\n", + "alpha = 0.05\n", + "print(f\"Near n={len(near_vals)}, Far n={len(far_vals)}\")\n", + "print(f\"Means: Near={near_vals.mean():.0f}, Far={far_vals.mean():.0f}\")\n", + "print(f\"t={t:.3f}, two-sided p={p:.4g}\")\n", + "print(\"Decision:\", \"Significant difference\" if p < alpha else \"No significant difference\")" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -512,7 +711,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.12.7" } }, "nbformat": 4,