diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb index 0cc26d5..17249ca 100644 --- a/lab-hypothesis-testing.ipynb +++ b/lab-hypothesis-testing.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -46,12 +46,13 @@ "import pandas as pd\n", "import scipy.stats as st\n", "import numpy as np\n", - "\n" + "from scipy.stats import ttest_ind\n", + "from statsmodels.multivariate.manova import MANOVA\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 43, "metadata": {}, "outputs": [ { @@ -278,7 +279,7 @@ "[800 rows x 11 columns]" ] }, - "execution_count": 3, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -297,11 +298,32 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 44, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: 3.3350\n", + "P-value: 0.0008\n", + "✅ Dragon Pokémons have significantly higher HP than Grass Pokémons.\n", + "\n" + ] + } + ], "source": [ - "#code here" + "dragon_hp = df[df['Type 1'] == 'Dragon']['HP']\n", + "grass_hp = df[df['Type 1'] == 'Grass']['HP']\n", + "\n", + "t_stat_1, p_val_1 = ttest_ind(dragon_hp, grass_hp, equal_var=False, alternative='greater')\n", + "\n", + "print(f\"T-statistic: {t_stat_1:.4f}\")\n", + "print(f\"P-value: {p_val_1:.4f}\")\n", + "if p_val_1 < 0.05:\n", + " print(\"✅ Dragon Pokémons have significantly higher HP than Grass Pokémons.\\n\")\n", + "else:\n", + " print(\"❌ No significant difference in HP between Dragon and Grass Pokémons.\\n\")" ] }, { @@ -313,11 +335,55 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Multivariate linear model\n", + "================================================================\n", + " \n", + "----------------------------------------------------------------\n", + " Intercept Value Num DF Den DF F Value Pr > F\n", + "----------------------------------------------------------------\n", + " Wilks' lambda 0.0592 6.0000 793.0000 2100.8338 0.0000\n", + " Pillai's trace 0.9408 6.0000 793.0000 2100.8338 0.0000\n", + " Hotelling-Lawley trace 15.8953 6.0000 793.0000 2100.8338 0.0000\n", + " Roy's greatest root 15.8953 6.0000 793.0000 2100.8338 0.0000\n", + "----------------------------------------------------------------\n", + " \n", + "----------------------------------------------------------------\n", + " Legendary Value Num DF Den DF F Value Pr > F\n", + "----------------------------------------------------------------\n", + " Wilks' lambda 0.7331 6.0000 793.0000 48.1098 0.0000\n", + " Pillai's trace 0.2669 6.0000 793.0000 48.1098 0.0000\n", + " Hotelling-Lawley trace 0.3640 6.0000 793.0000 48.1098 0.0000\n", + " Roy's greatest root 0.3640 6.0000 793.0000 48.1098 0.0000\n", + "================================================================\n", + "\n" + ] + } + ], + "source": [ + "stats_cols = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n", + "\n", + "df = df.rename(columns={'Sp. Atk':'Sp_Atk', 'Sp. Def':'Sp_Def'})\n", + "stats_cols = ['HP', 'Attack', 'Defense', 'Sp_Atk', 'Sp_Def', 'Speed']\n", + "\n", + "manova = MANOVA.from_formula('HP + Attack + Defense + Sp_Atk + Sp_Def + Speed ~ Legendary', data=df)\n", + "manova_result = manova.mv_test()\n", + "\n", + "print(manova_result)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "#code here" + "- The MANOVA test shows that Legendary and Non-Legendary Pokémons differ significantly across the combination of stats (HP, Attack, Defense, Sp_Atk, Sp_Def, Speed).\n", + "- Considering all stats together, Legendary Pokémons have a different profile than Non-Legendary Pokémons." ] }, { @@ -337,7 +403,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -453,7 +519,7 @@ "4 624.0 262.0 1.9250 65500.0 " ] }, - "execution_count": 5, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -483,22 +549,45 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: 37.9923, P-value: 0.0000\n", + "✅ Houses close to schools or hospitals are significantly more expensive.\n", + "\n" + ] + } + ], + "source": [ + "def euclidean_dist(lon1, lat1, lon2, lat2):\n", + " return np.sqrt((lon1 - lon2)**2 + (lat1 - lat2)**2)\n", + "\n", + "dist_school = euclidean_dist(df['longitude'], df['latitude'], *school)\n", + "dist_hospital = euclidean_dist(df['longitude'], df['latitude'], *hospital)\n", + "\n", + "close = (dist_school < 0.5) | (dist_hospital < 0.5)\n", + "df['close_to_school_or_hospital'] = close\n", + "\n", + "close_prices = df[df['close_to_school_or_hospital']]['median_house_value']\n", + "far_prices = df[~df['close_to_school_or_hospital']]['median_house_value']\n", + "\n", + "t_stat, p_val = ttest_ind(close_prices, far_prices, equal_var=False)\n", + "\n", + "print(f\"T-statistic: {t_stat:.4f}, P-value: {p_val:.4f}\")\n", + "if p_val < 0.05:\n", + " print(\"✅ Houses close to schools or hospitals are significantly more expensive.\\n\")\n", + "else:\n", + " print(\"❌ No significant price difference based on proximity.\\n\")" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -512,9 +601,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.13.3" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }