Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 155 additions & 15 deletions lab-hypothesis-testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -278,7 +278,7 @@
"[800 rows x 11 columns]"
]
},
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -297,11 +297,65 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#code here"
"# H0 = there is no difference\n",
"# H1 = Dragons have higher avg HP\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Reject H0: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\n"
]
}
],
"source": [
"from scipy.stats import ttest_ind\n",
"\n",
"grass_hp = df[df['Type 1'] == 'Grass']['HP']\n",
"dragon_hp = df[df['Type 1'] == 'Dragon']['HP']\n",
"\n",
"# t-test\n",
"t_stat, p_value_two_tailed = ttest_ind(dragon_hp, grass_hp, equal_var=False)\n",
"\n",
"# Convert to one-tailed p-value (Dragon > Grass)\n",
"p_value_one_tailed = p_value_two_tailed / 2 if t_stat > 0 else 1 - (p_value_two_tailed / 2)\n",
"\n",
"t_stat, p_value_one_tailed\n",
"\n",
"if p_value_one_tailed < 0.05:\n",
" print(\"Reject H0: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\")\n",
"else:\n",
" print(\"Fail to reject H0: No significant evidence that Dragon-type Pokémon have higher HP.\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(3.3349632905124063, 0.0007993609745420598)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t_stat, p_value_one_tailed"
]
},
{
Expand All @@ -313,11 +367,57 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#code here"
"# H0 = No difference in stats\n",
"# H1 = Different stats"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Multivariate linear model\n",
"================================================================\n",
" \n",
"----------------------------------------------------------------\n",
" Intercept Value Num DF Den DF F Value Pr > F\n",
"----------------------------------------------------------------\n",
" Wilks' lambda 0.0592 6.0000 793.0000 2100.8338 0.0000\n",
" Pillai's trace 0.9408 6.0000 793.0000 2100.8338 0.0000\n",
" Hotelling-Lawley trace 15.8953 6.0000 793.0000 2100.8338 0.0000\n",
" Roy's greatest root 15.8953 6.0000 793.0000 2100.8338 0.0000\n",
"----------------------------------------------------------------\n",
" \n",
"----------------------------------------------------------------\n",
" Legendary Value Num DF Den DF F Value Pr > F\n",
"----------------------------------------------------------------\n",
" Wilks' lambda 0.7331 6.0000 793.0000 48.1098 0.0000\n",
" Pillai's trace 0.2669 6.0000 793.0000 48.1098 0.0000\n",
" Hotelling-Lawley trace 0.3640 6.0000 793.0000 48.1098 0.0000\n",
" Roy's greatest root 0.3640 6.0000 793.0000 48.1098 0.0000\n",
"================================================================\n",
"\n"
]
}
],
"source": [
"from statsmodels.multivariate.manova import MANOVA\n",
"\n",
"stats_cols = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n",
"df_manova = df[stats_cols + ['Legendary']]\n",
"\n",
"\n",
"maov = MANOVA.from_formula('HP + Attack + Defense + Q(\"Sp. Atk\") + Q(\"Sp. Def\") + Speed ~ Legendary', data=df_manova)\n",
"result = maov.mv_test()\n",
"print(result)"
]
},
{
Expand All @@ -337,7 +437,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 14,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -453,7 +553,7 @@
"4 624.0 262.0 1.9250 65500.0 "
]
},
"execution_count": 5,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -483,22 +583,62 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": []
"source": [
"from scipy.stats import ttest_ind\n",
"\n",
"school_coords = (-118, 34)\n",
"hospital_coords = (-122, 37)\n",
"\n",
"def euclidean_distance(lat1, lon1, lat2, lon2):\n",
" return np.sqrt((lat1 - lat2)**2 + (lon1 - lon2)**2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Close to facility house values: [124700. 137500. 169100. ... 277700. 319400. 286100.]\n",
"Far from facility house values: [ 66900. 80100. 85700. ... 103600. 85800. 94600.]\n",
"t-statistic: 37.992, p-value: 0.000\n",
"Reject H0: Houses close to a school or hospital are significantly more expensive.\n"
]
}
],
"source": [
"df['dist_school'] = euclidean_distance(df['latitude'], df['longitude'], school_coords[1], school_coords[0])\n",
"df['dist_hospital'] = euclidean_distance(df['latitude'], df['longitude'], hospital_coords[1], hospital_coords[0])\n",
"\n",
"df['close_to_facility'] = (df['dist_school'] < 0.50) | (df['dist_hospital'] < 0.50)\n",
"\n",
"close_prices = df[df['close_to_facility']]['median_house_value']\n",
"far_prices = df[~df['close_to_facility']]['median_house_value']\n",
"\n",
"t_stat, p_value_two_tailed = ttest_ind(close_prices, far_prices, equal_var=False)\n",
"p_value_one_tailed = p_value_two_tailed / 2 if t_stat > 0 else 1 - (p_value_two_tailed / 2)\n",
"\n",
"\n",
"print(\"Close to facility house values:\", close_prices.values)\n",
"print(\"Far from facility house values:\", far_prices.values)\n",
"print(f\"t-statistic: {t_stat:.3f}, p-value: {p_value_one_tailed:.3f}\")\n",
"\n",
"if p_value_one_tailed < 0.05:\n",
" print(\"Reject H0: Houses close to a school or hospital are significantly more expensive.\")\n",
"else:\n",
" print(\"Fail to reject H0: No significant evidence that proximity to a school or hospital increases house prices.\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "base",
"language": "python",
"name": "python3"
},
Expand All @@ -512,7 +652,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.11.7"
}
},
"nbformat": 4,
Expand Down