diff --git a/Image20250607120457.jpg b/Image20250607120457.jpg new file mode 100644 index 0000000..a2f0fe9 Binary files /dev/null and b/Image20250607120457.jpg differ diff --git a/Image20250607120510.jpg b/Image20250607120510.jpg new file mode 100644 index 0000000..52b250d Binary files /dev/null and b/Image20250607120510.jpg differ diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb index 0cc26d5..f49e3bb 100644 --- a/lab-hypothesis-testing.ipynb +++ b/lab-hypothesis-testing.ipynb @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -278,7 +278,7 @@ "[800 rows x 11 columns]" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -297,11 +297,39 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: 4.4991348531252635\n", + "P-value: 7.0097137393088205e-06\n", + "Reject the null hypothesis: Dragon-type Pokémon have significantly higher average HP than Grass-type Pokémon.\n" + ] + } + ], "source": [ - "#code here" + "# MOD: considerar Type 1 y Type 2 para Dragon/Grass (pequeña mejora)\n", + "dragon_mask = (df['Type 1'] == 'Dragon') | ((\"Type 2\" in df.columns) & (df['Type 2'] == 'Dragon'))\n", + "grass_mask = (df['Type 1'] == 'Grass') | ((\"Type 2\" in df.columns) & (df['Type 2'] == 'Grass'))\n", + "dragon_hp = df.loc[dragon_mask, 'HP'].dropna()\n", + "grass_hp = df.loc[grass_mask, 'HP'].dropna()\n", + "# Filter HP values for Dragon and Grass types\n", + "\n", + "# Perform a one-tailed t-test\n", + "t_stat, p_value = st.ttest_ind(dragon_hp, grass_hp, alternative='greater')\n", + "\n", + "# Print the results\n", + "print(f\"T-statistic: {t_stat}\")\n", + "print(f\"P-value: {p_value}\")\n", + "\n", + "# Conclusion\n", + "if p_value < 0.05:\n", + " print(\"Reject the null hypothesis: Dragon-type Pokémon have significantly higher average HP than Grass-type Pokémon.\")\n", + "else:\n", + " print(\"Fail to reject the null hypothesis: No significant difference in average HP between Dragon and Grass types.\")" ] }, { @@ -313,11 +341,139 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HP: t-statistic = 8.98, p-value = 0.0000\n", + " -> Significant difference in HP between Legendary and Non-Legendary Pokémon.\n", + "Attack: t-statistic = 10.44, p-value = 0.0000\n", + " -> Significant difference in Attack between Legendary and Non-Legendary Pokémon.\n", + "Defense: t-statistic = 7.64, p-value = 0.0000\n", + " -> Significant difference in Defense between Legendary and Non-Legendary Pokémon.\n", + "Sp. Atk: t-statistic = 13.42, p-value = 0.0000\n", + " -> Significant difference in Sp. Atk between Legendary and Non-Legendary Pokémon.\n", + "Sp. Def: t-statistic = 10.02, p-value = 0.0000\n", + " -> Significant difference in Sp. Def between Legendary and Non-Legendary Pokémon.\n", + "Speed: t-statistic = 11.48, p-value = 0.0000\n", + " -> Significant difference in Speed between Legendary and Non-Legendary Pokémon.\n" + ] + } + ], "source": [ - "#code here" + "# Separate Legendary and Non-Legendary Pokémon\n", + "legendary = df[df['Legendary'] == True]\n", + "non_legendary = df[df['Legendary'] == False]\n", + "\n", + "# List of stats to compare\n", + "stats = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n", + "\n", + "# Perform t-tests for each stat\n", + "results = {}\n", + "for stat in stats:\n", + " t_stat, p_value = st.ttest_ind(legendary[stat], non_legendary[stat], equal_var=False) # Welch's t-test\n", + " results[stat] = {'t_stat': t_stat, 'p_value': p_value}\n", + "\n", + "# Print the results\n", + "for stat, result in results.items():\n", + " print(f\"{stat}: t-statistic = {result['t_stat']:.2f}, p-value = {result['p_value']:.4f}\")\n", + " if result['p_value'] < 0.05:\n", + " print(f\" -> Significant difference in {stat} between Legendary and Non-Legendary Pokémon.\")\n", + " else:\n", + " print(f\" -> No significant difference in {stat} between Legendary and Non-Legendary Pokémon.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LegendaryNon-LegendaryLegendary
HP67.18231392.738462
Attack75.669388116.676923
Defense71.55918499.661538
Sp. Atk68.454422122.184615
Sp. Def68.892517105.938462
Speed65.455782100.184615
\n", + "
" + ], + "text/plain": [ + "Legendary Non-Legendary Legendary\n", + "HP 67.182313 92.738462\n", + "Attack 75.669388 116.676923\n", + "Defense 71.559184 99.661538\n", + "Sp. Atk 68.454422 122.184615\n", + "Sp. Def 68.892517 105.938462\n", + "Speed 65.455782 100.184615" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# MOD: mostrar también medias por grupo para contextualizar las diferencias\n", + "import pandas as pd\n", + "stats_cols = [c for c in ['HP','Attack','Defense','Sp. Atk','Sp. Def','Speed'] if c in df.columns]\n", + "if stats_cols:\n", + " means_table = df.groupby('Legendary')[stats_cols].mean().rename(index={True:'Legendary', False:'Non-Legendary'})\n", + " display(means_table.T)\n", + "else:\n", + " print('No se encontraron columnas de estadísticas estándar para mostrar medias.')" ] }, { @@ -483,10 +639,52 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: 37.992330214201516\n", + "P-value: 3.0064957768592614e-301\n", + "Reject the null hypothesis: Houses close to a school or hospital are significantly more expensive.\n" + ] + } + ], + "source": [ + "# School and hospital coordinates\n", + "school_coords = (-118, 34)\n", + "hospital_coords = (-122, 37)\n", + "\n", + "# Function to calculate Euclidean distance\n", + "def calculate_distance(lat, lon, coords):\n", + " return np.sqrt((lat - coords[1])**2 + (lon - coords[0])**2)\n", + "\n", + "# Calculate distances to school and hospital\n", + "df['distance_to_school'] = calculate_distance(df['latitude'], df['longitude'], school_coords)\n", + "df['distance_to_hospital'] = calculate_distance(df['latitude'], df['longitude'], hospital_coords)\n", + "\n", + "# Determine if a house is close to either a school or hospital\n", + "df['close_to_school_or_hospital'] = (df['distance_to_school'] < 0.50) | (df['distance_to_hospital'] < 0.50)\n", + "\n", + "# Divide the dataset into two groups\n", + "close_group = df[df['close_to_school_or_hospital'] == True]['median_house_value']\n", + "far_group = df[df['close_to_school_or_hospital'] == False]['median_house_value']\n", + "\n", + "# Perform a two-sample t-test\n", + "t_stat, p_value = st.ttest_ind(close_group, far_group, equal_var=False) # Welch's t-test\n", + "\n", + "# Print the results\n", + "print(f\"T-statistic: {t_stat}\")\n", + "print(f\"P-value: {p_value}\")\n", + "\n", + "# Conclusion\n", + "if p_value < 0.05:\n", + " print(\"Reject the null hypothesis: Houses close to a school or hospital are significantly more expensive.\")\n", + "else:\n", + " print(\"Fail to reject the null hypothesis: No significant difference in house prices.\")" + ] }, { "cell_type": "code", @@ -498,7 +696,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "venv", "language": "python", "name": "python3" }, @@ -512,7 +710,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.12.2" } }, "nbformat": 4,