Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added Image20250607120457.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Image20250607120510.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
224 changes: 211 additions & 13 deletions lab-hypothesis-testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -278,7 +278,7 @@
"[800 rows x 11 columns]"
]
},
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -297,11 +297,39 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T-statistic: 4.4991348531252635\n",
"P-value: 7.0097137393088205e-06\n",
"Reject the null hypothesis: Dragon-type Pokémon have significantly higher average HP than Grass-type Pokémon.\n"
]
}
],
"source": [
"#code here"
"# MOD: considerar Type 1 y Type 2 para Dragon/Grass (pequeña mejora)\n",
"dragon_mask = (df['Type 1'] == 'Dragon') | ((\"Type 2\" in df.columns) & (df['Type 2'] == 'Dragon'))\n",
"grass_mask = (df['Type 1'] == 'Grass') | ((\"Type 2\" in df.columns) & (df['Type 2'] == 'Grass'))\n",
"dragon_hp = df.loc[dragon_mask, 'HP'].dropna()\n",
"grass_hp = df.loc[grass_mask, 'HP'].dropna()\n",
"# Filter HP values for Dragon and Grass types\n",
"\n",
"# Perform a one-tailed t-test\n",
"t_stat, p_value = st.ttest_ind(dragon_hp, grass_hp, alternative='greater')\n",
"\n",
"# Print the results\n",
"print(f\"T-statistic: {t_stat}\")\n",
"print(f\"P-value: {p_value}\")\n",
"\n",
"# Conclusion\n",
"if p_value < 0.05:\n",
" print(\"Reject the null hypothesis: Dragon-type Pokémon have significantly higher average HP than Grass-type Pokémon.\")\n",
"else:\n",
" print(\"Fail to reject the null hypothesis: No significant difference in average HP between Dragon and Grass types.\")"
]
},
{
Expand All @@ -313,11 +341,139 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HP: t-statistic = 8.98, p-value = 0.0000\n",
" -> Significant difference in HP between Legendary and Non-Legendary Pokémon.\n",
"Attack: t-statistic = 10.44, p-value = 0.0000\n",
" -> Significant difference in Attack between Legendary and Non-Legendary Pokémon.\n",
"Defense: t-statistic = 7.64, p-value = 0.0000\n",
" -> Significant difference in Defense between Legendary and Non-Legendary Pokémon.\n",
"Sp. Atk: t-statistic = 13.42, p-value = 0.0000\n",
" -> Significant difference in Sp. Atk between Legendary and Non-Legendary Pokémon.\n",
"Sp. Def: t-statistic = 10.02, p-value = 0.0000\n",
" -> Significant difference in Sp. Def between Legendary and Non-Legendary Pokémon.\n",
"Speed: t-statistic = 11.48, p-value = 0.0000\n",
" -> Significant difference in Speed between Legendary and Non-Legendary Pokémon.\n"
]
}
],
"source": [
"#code here"
"# Separate Legendary and Non-Legendary Pokémon\n",
"legendary = df[df['Legendary'] == True]\n",
"non_legendary = df[df['Legendary'] == False]\n",
"\n",
"# List of stats to compare\n",
"stats = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n",
"\n",
"# Perform t-tests for each stat\n",
"results = {}\n",
"for stat in stats:\n",
" t_stat, p_value = st.ttest_ind(legendary[stat], non_legendary[stat], equal_var=False) # Welch's t-test\n",
" results[stat] = {'t_stat': t_stat, 'p_value': p_value}\n",
"\n",
"# Print the results\n",
"for stat, result in results.items():\n",
" print(f\"{stat}: t-statistic = {result['t_stat']:.2f}, p-value = {result['p_value']:.4f}\")\n",
" if result['p_value'] < 0.05:\n",
" print(f\" -> Significant difference in {stat} between Legendary and Non-Legendary Pokémon.\")\n",
" else:\n",
" print(f\" -> No significant difference in {stat} between Legendary and Non-Legendary Pokémon.\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Legendary</th>\n",
" <th>Non-Legendary</th>\n",
" <th>Legendary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>HP</th>\n",
" <td>67.182313</td>\n",
" <td>92.738462</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Attack</th>\n",
" <td>75.669388</td>\n",
" <td>116.676923</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Defense</th>\n",
" <td>71.559184</td>\n",
" <td>99.661538</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Sp. Atk</th>\n",
" <td>68.454422</td>\n",
" <td>122.184615</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Sp. Def</th>\n",
" <td>68.892517</td>\n",
" <td>105.938462</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Speed</th>\n",
" <td>65.455782</td>\n",
" <td>100.184615</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Legendary Non-Legendary Legendary\n",
"HP 67.182313 92.738462\n",
"Attack 75.669388 116.676923\n",
"Defense 71.559184 99.661538\n",
"Sp. Atk 68.454422 122.184615\n",
"Sp. Def 68.892517 105.938462\n",
"Speed 65.455782 100.184615"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# MOD: mostrar también medias por grupo para contextualizar las diferencias\n",
"import pandas as pd\n",
"stats_cols = [c for c in ['HP','Attack','Defense','Sp. Atk','Sp. Def','Speed'] if c in df.columns]\n",
"if stats_cols:\n",
" means_table = df.groupby('Legendary')[stats_cols].mean().rename(index={True:'Legendary', False:'Non-Legendary'})\n",
" display(means_table.T)\n",
"else:\n",
" print('No se encontraron columnas de estadísticas estándar para mostrar medias.')"
]
},
{
Expand Down Expand Up @@ -483,10 +639,52 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T-statistic: 37.992330214201516\n",
"P-value: 3.0064957768592614e-301\n",
"Reject the null hypothesis: Houses close to a school or hospital are significantly more expensive.\n"
]
}
],
"source": [
"# School and hospital coordinates\n",
"school_coords = (-118, 34)\n",
"hospital_coords = (-122, 37)\n",
"\n",
"# Function to calculate Euclidean distance\n",
"def calculate_distance(lat, lon, coords):\n",
" return np.sqrt((lat - coords[1])**2 + (lon - coords[0])**2)\n",
"\n",
"# Calculate distances to school and hospital\n",
"df['distance_to_school'] = calculate_distance(df['latitude'], df['longitude'], school_coords)\n",
"df['distance_to_hospital'] = calculate_distance(df['latitude'], df['longitude'], hospital_coords)\n",
"\n",
"# Determine if a house is close to either a school or hospital\n",
"df['close_to_school_or_hospital'] = (df['distance_to_school'] < 0.50) | (df['distance_to_hospital'] < 0.50)\n",
"\n",
"# Divide the dataset into two groups\n",
"close_group = df[df['close_to_school_or_hospital'] == True]['median_house_value']\n",
"far_group = df[df['close_to_school_or_hospital'] == False]['median_house_value']\n",
"\n",
"# Perform a two-sample t-test\n",
"t_stat, p_value = st.ttest_ind(close_group, far_group, equal_var=False) # Welch's t-test\n",
"\n",
"# Print the results\n",
"print(f\"T-statistic: {t_stat}\")\n",
"print(f\"P-value: {p_value}\")\n",
"\n",
"# Conclusion\n",
"if p_value < 0.05:\n",
" print(\"Reject the null hypothesis: Houses close to a school or hospital are significantly more expensive.\")\n",
"else:\n",
" print(\"Fail to reject the null hypothesis: No significant difference in house prices.\")"
]
},
{
"cell_type": "code",
Expand All @@ -498,7 +696,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "venv",
"language": "python",
"name": "python3"
},
Expand All @@ -512,7 +710,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.12.2"
}
},
"nbformat": 4,
Expand Down