From c5d980ab13c3105fe642196e87d831046c10beae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20Domingos=20Gon=C3=A7alves?= Date: Tue, 9 Sep 2025 17:58:52 +0100 Subject: [PATCH] lab-hypothesis-testing solved --- lab-hypothesis-testing.ipynb | 206 ++++++++++++++++++++++++++++++++--- 1 file changed, 188 insertions(+), 18 deletions(-) diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb index 0cc26d5..39ffa54 100644 --- a/lab-hypothesis-testing.ipynb +++ b/lab-hypothesis-testing.ipynb @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -278,7 +278,7 @@ "[800 rows x 11 columns]" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -297,11 +297,46 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: 3.335, One-tailed p-value: 0.0008\n", + "Reject H0: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\n", + "Mean HP - Dragon: 83.31, Grass: 67.27\n" + ] + } + ], "source": [ - "#code here" + "# Filter HP stats by type\n", + "dragon_hp = df[df['Type 1'] == 'Dragon']['HP']\n", + "grass_hp = df[df['Type 1'] == 'Grass']['HP']\n", + "\n", + "# Two-sample t-test (assume unequal variance, one-tailed)\n", + "t_stat, p_value_two_tailed = st.ttest_ind(dragon_hp, grass_hp, equal_var=False)\n", + "\n", + "# Convert to one-tailed p-value for Dragon > Grass\n", + "if t_stat > 0:\n", + " p_value_one_tailed = p_value_two_tailed / 2\n", + "else:\n", + " p_value_one_tailed = 1 - p_value_two_tailed / 2\n", + "\n", + "print(f\"T-statistic: {t_stat:.3f}, One-tailed p-value: {p_value_one_tailed:.4f}\")\n", + "\n", + "# Significance check\n", + "alpha = 0.05\n", + "if p_value_one_tailed < alpha:\n", + " print(\"Reject H0: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\")\n", + "else:\n", + " print(\"Fail to reject H0: No significant difference in HP between Dragon and Grass Pokémon.\")\n", + "\n", + "# Optional: mean HP for both types\n", + "mean_dragon = np.mean(dragon_hp)\n", + "mean_grass = np.mean(grass_hp)\n", + "print(f\"Mean HP - Dragon: {mean_dragon:.2f}, Grass: {mean_grass:.2f}\")" ] }, { @@ -313,11 +348,84 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Stat: HP\n", + "T-statistic: 8.981, Two-tailed p-value: 0.0000\n", + "Reject H0: Legendary and Non-Legendary Pokémon have significantly different HP.\n", + "\n", + "Stat: Attack\n", + "T-statistic: 10.438, Two-tailed p-value: 0.0000\n", + "Reject H0: Legendary and Non-Legendary Pokémon have significantly different Attack.\n", + "\n", + "Stat: Defense\n", + "T-statistic: 7.637, Two-tailed p-value: 0.0000\n", + "Reject H0: Legendary and Non-Legendary Pokémon have significantly different Defense.\n", + "\n", + "Stat: Sp. Atk\n", + "T-statistic: 13.417, Two-tailed p-value: 0.0000\n", + "Reject H0: Legendary and Non-Legendary Pokémon have significantly different Sp. Atk.\n", + "\n", + "Stat: Sp. Def\n", + "T-statistic: 10.016, Two-tailed p-value: 0.0000\n", + "Reject H0: Legendary and Non-Legendary Pokémon have significantly different Sp. Def.\n", + "\n", + "Stat: Speed\n", + "T-statistic: 11.475, Two-tailed p-value: 0.0000\n", + "Reject H0: Legendary and Non-Legendary Pokémon have significantly different Speed.\n", + "\n", + "Mean stats for Legendary Pokémon:\n", + "HP 92.738462\n", + "Attack 116.676923\n", + "Defense 99.661538\n", + "Sp. Atk 122.184615\n", + "Sp. Def 105.938462\n", + "Speed 100.184615\n", + "dtype: float64\n", + "\n", + "Mean stats for Non-Legendary Pokémon:\n", + "HP 67.182313\n", + "Attack 75.669388\n", + "Defense 71.559184\n", + "Sp. Atk 68.454422\n", + "Sp. Def 68.892517\n", + "Speed 65.455782\n", + "dtype: float64\n" + ] + } + ], "source": [ - "#code here" + "# Columns to test\n", + "stats_cols = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n", + "\n", + "# Split data\n", + "legendary = df[df['Legendary'] == True]\n", + "non_legendary = df[df['Legendary'] == False]\n", + "\n", + "# Significance level\n", + "alpha = 0.05\n", + "\n", + "# Perform independent t-tests for each stat\n", + "for col in stats_cols:\n", + " t_stat, p_value_two_tailed = st.ttest_ind(legendary[col], non_legendary[col], equal_var=False)\n", + " print(f\"\\nStat: {col}\")\n", + " print(f\"T-statistic: {t_stat:.3f}, Two-tailed p-value: {p_value_two_tailed:.4f}\")\n", + " if p_value_two_tailed < alpha:\n", + " print(f\"Reject H0: Legendary and Non-Legendary Pokémon have significantly different {col}.\")\n", + " else:\n", + " print(f\"Fail to reject H0: No significant difference in {col} between Legendary and Non-Legendary Pokémon.\")\n", + "\n", + "# Optional: print mean stats for each group\n", + "print(\"\\nMean stats for Legendary Pokémon:\")\n", + "print(legendary[stats_cols].mean())\n", + "print(\"\\nMean stats for Non-Legendary Pokémon:\")\n", + "print(non_legendary[stats_cols].mean())" ] }, { @@ -337,7 +445,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -453,7 +561,7 @@ "4 624.0 262.0 1.9250 65500.0 " ] }, - "execution_count": 5, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -483,22 +591,84 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income', 'median_house_value']\n" + ] + } + ], + "source": [ + "print(df.columns.tolist())" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: 37.992\n", + "One-tailed p-value: 0.0000\n", + "Reject H0: Houses close to a school or hospital are significantly more expensive.\n", + "Mean house value (close): 246951.98\n", + "Mean house value (far): 180678.44\n" + ] + } + ], + "source": [ + "# Function to calculate Euclidean distance\n", + "def euclidean_distance(lon1, lat1, lon2, lat2):\n", + " return np.sqrt((lon1 - lon2)**2 + (lat1 - lat2)**2)\n", + "\n", + "# Calculate distance from each house to school and hospital\n", + "df['dist_school'] = euclidean_distance(df['longitude'], df['latitude'], school_coord[0], school_coord[1])\n", + "df['dist_hospital'] = euclidean_distance(df['longitude'], df['latitude'], hospital_coord[0], hospital_coord[1])\n", + "\n", + "# Minimum distance to either school or hospital\n", + "df['min_dist'] = df[['dist_school', 'dist_hospital']].min(axis=1)\n", + "\n", + "# Split dataset into houses close and far (distance < 0.50 is close)\n", + "close_houses = df[df['min_dist'] < 0.50]['median_house_value']\n", + "far_houses = df[df['min_dist'] >= 0.50]['median_house_value']\n", + "\n", + "# Two-sample t-test (unequal variance)\n", + "t_stat, p_value_two_tailed = stats.ttest_ind(close_houses, far_houses, equal_var=False)\n", + "\n", + "# Convert to one-tailed p-value for \"close houses > far houses\"\n", + "if t_stat > 0:\n", + " p_value_one_tailed = p_value_two_tailed / 2\n", + "else:\n", + " p_value_one_tailed = 1 - p_value_two_tailed / 2\n", + "\n", + "# Significance level\n", + "alpha = 0.05\n", + "\n", + "# Print results\n", + "print(f\"T-statistic: {t_stat:.3f}\")\n", + "print(f\"One-tailed p-value: {p_value_one_tailed:.4f}\")\n", + "\n", + "if p_value_one_tailed < alpha:\n", + " print(\"Reject H0: Houses close to a school or hospital are significantly more expensive.\")\n", + "else:\n", + " print(\"Fail to reject H0: No significant difference in house value based on proximity.\")\n", + "\n", + "# Optional: Mean house values for interpretation\n", + "print(f\"Mean house value (close): {close_houses.mean():.2f}\")\n", + "print(f\"Mean house value (far): {far_houses.mean():.2f}\")" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "tfdf_env", "language": "python", "name": "python3" }, @@ -512,7 +682,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.11.13" } }, "nbformat": 4,