Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 114 additions & 25 deletions lab-hypothesis-testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,21 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"#libraries\n",
"import pandas as pd\n",
"import scipy.stats as st\n",
"import numpy as np\n",
"\n"
"from scipy.stats import ttest_ind\n",
"from statsmodels.multivariate.manova import MANOVA\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 43,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -278,7 +279,7 @@
"[800 rows x 11 columns]"
]
},
"execution_count": 3,
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -297,11 +298,32 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 44,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T-statistic: 3.3350\n",
"P-value: 0.0008\n",
"✅ Dragon Pokémons have significantly higher HP than Grass Pokémons.\n",
"\n"
]
}
],
"source": [
"#code here"
"dragon_hp = df[df['Type 1'] == 'Dragon']['HP']\n",
"grass_hp = df[df['Type 1'] == 'Grass']['HP']\n",
"\n",
"t_stat_1, p_val_1 = ttest_ind(dragon_hp, grass_hp, equal_var=False, alternative='greater')\n",
"\n",
"print(f\"T-statistic: {t_stat_1:.4f}\")\n",
"print(f\"P-value: {p_val_1:.4f}\")\n",
"if p_val_1 < 0.05:\n",
" print(\"✅ Dragon Pokémons have significantly higher HP than Grass Pokémons.\\n\")\n",
"else:\n",
" print(\"❌ No significant difference in HP between Dragon and Grass Pokémons.\\n\")"
]
},
{
Expand All @@ -313,11 +335,55 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Multivariate linear model\n",
"================================================================\n",
" \n",
"----------------------------------------------------------------\n",
" Intercept Value Num DF Den DF F Value Pr > F\n",
"----------------------------------------------------------------\n",
" Wilks' lambda 0.0592 6.0000 793.0000 2100.8338 0.0000\n",
" Pillai's trace 0.9408 6.0000 793.0000 2100.8338 0.0000\n",
" Hotelling-Lawley trace 15.8953 6.0000 793.0000 2100.8338 0.0000\n",
" Roy's greatest root 15.8953 6.0000 793.0000 2100.8338 0.0000\n",
"----------------------------------------------------------------\n",
" \n",
"----------------------------------------------------------------\n",
" Legendary Value Num DF Den DF F Value Pr > F\n",
"----------------------------------------------------------------\n",
" Wilks' lambda 0.7331 6.0000 793.0000 48.1098 0.0000\n",
" Pillai's trace 0.2669 6.0000 793.0000 48.1098 0.0000\n",
" Hotelling-Lawley trace 0.3640 6.0000 793.0000 48.1098 0.0000\n",
" Roy's greatest root 0.3640 6.0000 793.0000 48.1098 0.0000\n",
"================================================================\n",
"\n"
]
}
],
"source": [
"stats_cols = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n",
"\n",
"df = df.rename(columns={'Sp. Atk':'Sp_Atk', 'Sp. Def':'Sp_Def'})\n",
"stats_cols = ['HP', 'Attack', 'Defense', 'Sp_Atk', 'Sp_Def', 'Speed']\n",
"\n",
"manova = MANOVA.from_formula('HP + Attack + Defense + Sp_Atk + Sp_Def + Speed ~ Legendary', data=df)\n",
"manova_result = manova.mv_test()\n",
"\n",
"print(manova_result)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"outputs": [],
"source": [
"#code here"
"- The MANOVA test shows that Legendary and Non-Legendary Pokémons differ significantly across the combination of stats (HP, Attack, Defense, Sp_Atk, Sp_Def, Speed).\n",
"- Considering all stats together, Legendary Pokémons have a different profile than Non-Legendary Pokémons."
]
},
{
Expand All @@ -337,7 +403,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 46,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -453,7 +519,7 @@
"4 624.0 262.0 1.9250 65500.0 "
]
},
"execution_count": 5,
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -483,22 +549,45 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T-statistic: 37.9923, P-value: 0.0000\n",
"✅ Houses close to schools or hospitals are significantly more expensive.\n",
"\n"
]
}
],
"source": [
"def euclidean_dist(lon1, lat1, lon2, lat2):\n",
" return np.sqrt((lon1 - lon2)**2 + (lat1 - lat2)**2)\n",
"\n",
"dist_school = euclidean_dist(df['longitude'], df['latitude'], *school)\n",
"dist_hospital = euclidean_dist(df['longitude'], df['latitude'], *hospital)\n",
"\n",
"close = (dist_school < 0.5) | (dist_hospital < 0.5)\n",
"df['close_to_school_or_hospital'] = close\n",
"\n",
"close_prices = df[df['close_to_school_or_hospital']]['median_house_value']\n",
"far_prices = df[~df['close_to_school_or_hospital']]['median_house_value']\n",
"\n",
"t_stat, p_val = ttest_ind(close_prices, far_prices, equal_var=False)\n",
"\n",
"print(f\"T-statistic: {t_stat:.4f}, P-value: {p_val:.4f}\")\n",
"if p_val < 0.05:\n",
" print(\"✅ Houses close to schools or hospitals are significantly more expensive.\\n\")\n",
"else:\n",
" print(\"❌ No significant price difference based on proximity.\\n\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -512,9 +601,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.13.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}