Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 188 additions & 18 deletions lab-hypothesis-testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -278,7 +278,7 @@
"[800 rows x 11 columns]"
]
},
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -297,11 +297,46 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T-statistic: 3.335, One-tailed p-value: 0.0008\n",
"Reject H0: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\n",
"Mean HP - Dragon: 83.31, Grass: 67.27\n"
]
}
],
"source": [
"#code here"
"# Filter HP stats by type\n",
"dragon_hp = df[df['Type 1'] == 'Dragon']['HP']\n",
"grass_hp = df[df['Type 1'] == 'Grass']['HP']\n",
"\n",
"# Two-sample t-test (assume unequal variance, one-tailed)\n",
"t_stat, p_value_two_tailed = st.ttest_ind(dragon_hp, grass_hp, equal_var=False)\n",
"\n",
"# Convert to one-tailed p-value for Dragon > Grass\n",
"if t_stat > 0:\n",
" p_value_one_tailed = p_value_two_tailed / 2\n",
"else:\n",
" p_value_one_tailed = 1 - p_value_two_tailed / 2\n",
"\n",
"print(f\"T-statistic: {t_stat:.3f}, One-tailed p-value: {p_value_one_tailed:.4f}\")\n",
"\n",
"# Significance check\n",
"alpha = 0.05\n",
"if p_value_one_tailed < alpha:\n",
" print(\"Reject H0: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\")\n",
"else:\n",
" print(\"Fail to reject H0: No significant difference in HP between Dragon and Grass Pokémon.\")\n",
"\n",
"# Optional: mean HP for both types\n",
"mean_dragon = np.mean(dragon_hp)\n",
"mean_grass = np.mean(grass_hp)\n",
"print(f\"Mean HP - Dragon: {mean_dragon:.2f}, Grass: {mean_grass:.2f}\")"
]
},
{
Expand All @@ -313,11 +348,84 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 6,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Stat: HP\n",
"T-statistic: 8.981, Two-tailed p-value: 0.0000\n",
"Reject H0: Legendary and Non-Legendary Pokémon have significantly different HP.\n",
"\n",
"Stat: Attack\n",
"T-statistic: 10.438, Two-tailed p-value: 0.0000\n",
"Reject H0: Legendary and Non-Legendary Pokémon have significantly different Attack.\n",
"\n",
"Stat: Defense\n",
"T-statistic: 7.637, Two-tailed p-value: 0.0000\n",
"Reject H0: Legendary and Non-Legendary Pokémon have significantly different Defense.\n",
"\n",
"Stat: Sp. Atk\n",
"T-statistic: 13.417, Two-tailed p-value: 0.0000\n",
"Reject H0: Legendary and Non-Legendary Pokémon have significantly different Sp. Atk.\n",
"\n",
"Stat: Sp. Def\n",
"T-statistic: 10.016, Two-tailed p-value: 0.0000\n",
"Reject H0: Legendary and Non-Legendary Pokémon have significantly different Sp. Def.\n",
"\n",
"Stat: Speed\n",
"T-statistic: 11.475, Two-tailed p-value: 0.0000\n",
"Reject H0: Legendary and Non-Legendary Pokémon have significantly different Speed.\n",
"\n",
"Mean stats for Legendary Pokémon:\n",
"HP 92.738462\n",
"Attack 116.676923\n",
"Defense 99.661538\n",
"Sp. Atk 122.184615\n",
"Sp. Def 105.938462\n",
"Speed 100.184615\n",
"dtype: float64\n",
"\n",
"Mean stats for Non-Legendary Pokémon:\n",
"HP 67.182313\n",
"Attack 75.669388\n",
"Defense 71.559184\n",
"Sp. Atk 68.454422\n",
"Sp. Def 68.892517\n",
"Speed 65.455782\n",
"dtype: float64\n"
]
}
],
"source": [
"#code here"
"# Columns to test\n",
"stats_cols = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n",
"\n",
"# Split data\n",
"legendary = df[df['Legendary'] == True]\n",
"non_legendary = df[df['Legendary'] == False]\n",
"\n",
"# Significance level\n",
"alpha = 0.05\n",
"\n",
"# Perform independent t-tests for each stat\n",
"for col in stats_cols:\n",
" t_stat, p_value_two_tailed = st.ttest_ind(legendary[col], non_legendary[col], equal_var=False)\n",
" print(f\"\\nStat: {col}\")\n",
" print(f\"T-statistic: {t_stat:.3f}, Two-tailed p-value: {p_value_two_tailed:.4f}\")\n",
" if p_value_two_tailed < alpha:\n",
" print(f\"Reject H0: Legendary and Non-Legendary Pokémon have significantly different {col}.\")\n",
" else:\n",
" print(f\"Fail to reject H0: No significant difference in {col} between Legendary and Non-Legendary Pokémon.\")\n",
"\n",
"# Optional: print mean stats for each group\n",
"print(\"\\nMean stats for Legendary Pokémon:\")\n",
"print(legendary[stats_cols].mean())\n",
"print(\"\\nMean stats for Non-Legendary Pokémon:\")\n",
"print(non_legendary[stats_cols].mean())"
]
},
{
Expand All @@ -337,7 +445,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 17,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -453,7 +561,7 @@
"4 624.0 262.0 1.9250 65500.0 "
]
},
"execution_count": 5,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -483,22 +591,84 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income', 'median_house_value']\n"
]
}
],
"source": [
"print(df.columns.tolist())"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T-statistic: 37.992\n",
"One-tailed p-value: 0.0000\n",
"Reject H0: Houses close to a school or hospital are significantly more expensive.\n",
"Mean house value (close): 246951.98\n",
"Mean house value (far): 180678.44\n"
]
}
],
"source": [
"# Function to calculate Euclidean distance\n",
"def euclidean_distance(lon1, lat1, lon2, lat2):\n",
" return np.sqrt((lon1 - lon2)**2 + (lat1 - lat2)**2)\n",
"\n",
"# Calculate distance from each house to school and hospital\n",
"df['dist_school'] = euclidean_distance(df['longitude'], df['latitude'], school_coord[0], school_coord[1])\n",
"df['dist_hospital'] = euclidean_distance(df['longitude'], df['latitude'], hospital_coord[0], hospital_coord[1])\n",
"\n",
"# Minimum distance to either school or hospital\n",
"df['min_dist'] = df[['dist_school', 'dist_hospital']].min(axis=1)\n",
"\n",
"# Split dataset into houses close and far (distance < 0.50 is close)\n",
"close_houses = df[df['min_dist'] < 0.50]['median_house_value']\n",
"far_houses = df[df['min_dist'] >= 0.50]['median_house_value']\n",
"\n",
"# Two-sample t-test (unequal variance)\n",
"t_stat, p_value_two_tailed = stats.ttest_ind(close_houses, far_houses, equal_var=False)\n",
"\n",
"# Convert to one-tailed p-value for \"close houses > far houses\"\n",
"if t_stat > 0:\n",
" p_value_one_tailed = p_value_two_tailed / 2\n",
"else:\n",
" p_value_one_tailed = 1 - p_value_two_tailed / 2\n",
"\n",
"# Significance level\n",
"alpha = 0.05\n",
"\n",
"# Print results\n",
"print(f\"T-statistic: {t_stat:.3f}\")\n",
"print(f\"One-tailed p-value: {p_value_one_tailed:.4f}\")\n",
"\n",
"if p_value_one_tailed < alpha:\n",
" print(\"Reject H0: Houses close to a school or hospital are significantly more expensive.\")\n",
"else:\n",
" print(\"Fail to reject H0: No significant difference in house value based on proximity.\")\n",
"\n",
"# Optional: Mean house values for interpretation\n",
"print(f\"Mean house value (close): {close_houses.mean():.2f}\")\n",
"print(f\"Mean house value (far): {far_houses.mean():.2f}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "tfdf_env",
"language": "python",
"name": "python3"
},
Expand All @@ -512,7 +682,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.11.13"
}
},
"nbformat": 4,
Expand Down