diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb index 0cc26d5..112139f 100644 --- a/lab-hypothesis-testing.ipynb +++ b/lab-hypothesis-testing.ipynb @@ -40,6 +40,43 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting pandas\n", + " Using cached pandas-2.3.2-cp313-cp313-macosx_11_0_arm64.whl.metadata (91 kB)\n", + "Collecting scipy\n", + " Using cached scipy-1.16.1-cp313-cp313-macosx_14_0_arm64.whl.metadata (61 kB)\n", + "Collecting numpy\n", + " Using cached numpy-2.3.2-cp313-cp313-macosx_14_0_arm64.whl.metadata (62 kB)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in ./.venv/lib/python3.13/site-packages (from pandas) (2.9.0.post0)\n", + "Collecting pytz>=2020.1 (from pandas)\n", + " Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)\n", + "Collecting tzdata>=2022.7 (from pandas)\n", + " Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)\n", + "Requirement already satisfied: six>=1.5 in ./.venv/lib/python3.13/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", + "Using cached pandas-2.3.2-cp313-cp313-macosx_11_0_arm64.whl (10.7 MB)\n", + "Using cached scipy-1.16.1-cp313-cp313-macosx_14_0_arm64.whl (20.8 MB)\n", + "Using cached numpy-2.3.2-cp313-cp313-macosx_14_0_arm64.whl (5.1 MB)\n", + "Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB)\n", + "Using cached tzdata-2025.2-py2.py3-none-any.whl (347 kB)\n", + "Installing collected packages: pytz, tzdata, numpy, scipy, pandas\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5/5\u001b[0m [pandas]2m4/5\u001b[0m [pandas]\n", + "\u001b[1A\u001b[2KSuccessfully installed numpy-2.3.2 pandas-2.3.2 pytz-2025.2 scipy-1.16.1 tzdata-2025.2\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install pandas scipy numpy" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, "outputs": [], "source": [ "#libraries\n", @@ -51,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -278,7 +315,7 @@ "[800 rows x 11 columns]" ] }, - "execution_count": 3, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -297,11 +334,57 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "32" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df[df['Type 1'] == 'Dragon']['HP'])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=np.float64(2.7001863481114143), pvalue=np.float64(0.9954662680969679), df=np.float64(58.0))" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#code here" + "# H0: Dragon mu HP >= Grass mu HP\n", + "# H1: Dragon mu HP < Grass mu HP\n", + "# We consider a sample of 40 Pokemons from each type\n", + "# Significance level = 0.05\n", + "\n", + "dragon = df[df['Type 1'] == 'Dragon']['HP'].sample(30)\n", + "grass = df[df['Type 1'] == 'Grass']['HP'].sample(30)\n", + "\n", + "st.ttest_ind(dragon, grass, alternative='less')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are not able to reject H0 hypothesis: no significant evidence" ] }, { @@ -313,11 +396,40 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=array([-6.16755672, -6.16500997, -4.60731302, -7.41991116, -7.3658484 ,\n", + " -5.41183674]), pvalue=array([2.87308611e-08, 2.90433066e-08, 1.56437783e-05, 1.23935302e-10,\n", + " 1.57405763e-10, 6.65564558e-07]), df=array([78., 78., 78., 78., 78., 78.]))" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#code here" + "# H0: Legendary mu stats = Non Legendary mu stats\n", + "# H1: Legendary mu stats != Non Legendary mu stats\n", + "# We consider a sample of 40 Pokemons from each type\n", + "# Significant level = 0.05\n", + "\n", + "non_legendary = df.loc[df['Legendary'] == False, ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']].sample(40)\n", + "\n", + "legendary = df.loc[df['Legendary'] == True, ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']].sample(40)\n", + "\n", + "st.ttest_ind(non_legendary, legendary, alternative='two-sided')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We reject the null hypothesis because all the p-values are lower than the significance level" ] }, { @@ -337,7 +449,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -453,7 +565,7 @@ "4 624.0 262.0 1.9250 65500.0 " ] }, - "execution_count": 5, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -483,22 +595,329 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | longitude | \n", + "latitude | \n", + "housing_median_age | \n", + "total_rooms | \n", + "total_bedrooms | \n", + "population | \n", + "households | \n", + "median_income | \n", + "median_house_value | \n", + "distance_to_hospital | \n", + "distance_to_school | \n", + "close_to_services | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "-114.31 | \n", + "34.19 | \n", + "15.0 | \n", + "5612.0 | \n", + "1283.0 | \n", + "1015.0 | \n", + "472.0 | \n", + "1.4936 | \n", + "66900.0 | \n", + "8.187319 | \n", + "3.694888 | \n", + "False | \n", + "
1 | \n", + "-114.47 | \n", + "34.40 | \n", + "19.0 | \n", + "7650.0 | \n", + "1901.0 | \n", + "1129.0 | \n", + "463.0 | \n", + "1.8200 | \n", + "80100.0 | \n", + "7.966235 | \n", + "3.552591 | \n", + "False | \n", + "
2 | \n", + "-114.56 | \n", + "33.69 | \n", + "17.0 | \n", + "720.0 | \n", + "174.0 | \n", + "333.0 | \n", + "117.0 | \n", + "1.6509 | \n", + "85700.0 | \n", + "8.143077 | \n", + "3.453940 | \n", + "False | \n", + "
3 | \n", + "-114.57 | \n", + "33.64 | \n", + "14.0 | \n", + "1501.0 | \n", + "337.0 | \n", + "515.0 | \n", + "226.0 | \n", + "3.1917 | \n", + "73400.0 | \n", + "8.154416 | \n", + "3.448840 | \n", + "False | \n", + "
4 | \n", + "-114.57 | \n", + "33.57 | \n", + "20.0 | \n", + "1454.0 | \n", + "326.0 | \n", + "624.0 | \n", + "262.0 | \n", + "1.9250 | \n", + "65500.0 | \n", + "8.183508 | \n", + "3.456848 | \n", + "False | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
16995 | \n", + "-124.26 | \n", + "40.58 | \n", + "52.0 | \n", + "2217.0 | \n", + "394.0 | \n", + "907.0 | \n", + "369.0 | \n", + "2.3571 | \n", + "111400.0 | \n", + "4.233675 | \n", + "9.082070 | \n", + "False | \n", + "
16996 | \n", + "-124.27 | \n", + "40.69 | \n", + "36.0 | \n", + "2349.0 | \n", + "528.0 | \n", + "1194.0 | \n", + "465.0 | \n", + "2.5179 | \n", + "79000.0 | \n", + "4.332320 | \n", + "9.168915 | \n", + "False | \n", + "
16997 | \n", + "-124.30 | \n", + "41.84 | \n", + "17.0 | \n", + "2677.0 | \n", + "531.0 | \n", + "1244.0 | \n", + "456.0 | \n", + "3.0313 | \n", + "103600.0 | \n", + "5.358694 | \n", + "10.057614 | \n", + "False | \n", + "
16998 | \n", + "-124.30 | \n", + "41.80 | \n", + "19.0 | \n", + "2672.0 | \n", + "552.0 | \n", + "1298.0 | \n", + "478.0 | \n", + "1.9797 | \n", + "85800.0 | \n", + "5.322593 | \n", + "10.026465 | \n", + "False | \n", + "
16999 | \n", + "-124.35 | \n", + "40.54 | \n", + "52.0 | \n", + "1820.0 | \n", + "300.0 | \n", + "806.0 | \n", + "270.0 | \n", + "3.0147 | \n", + "94600.0 | \n", + "4.249012 | \n", + "9.115597 | \n", + "False | \n", + "
17000 rows × 12 columns
\n", + "