Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
288 changes: 161 additions & 127 deletions lab-hypothesis-testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,13 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#libraries\n",
"import pandas as pd\n",
"import scipy.stats as st\n",
"import numpy as np\n",
"from scipy.stats import ttest_ind\n",
"\n"
]
},
Expand Down Expand Up @@ -159,123 +158,24 @@
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>795</th>\n",
" <td>Diancie</td>\n",
" <td>Rock</td>\n",
" <td>Fairy</td>\n",
" <td>50</td>\n",
" <td>100</td>\n",
" <td>150</td>\n",
" <td>100</td>\n",
" <td>150</td>\n",
" <td>50</td>\n",
" <td>6</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>796</th>\n",
" <td>Mega Diancie</td>\n",
" <td>Rock</td>\n",
" <td>Fairy</td>\n",
" <td>50</td>\n",
" <td>160</td>\n",
" <td>110</td>\n",
" <td>160</td>\n",
" <td>110</td>\n",
" <td>110</td>\n",
" <td>6</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>797</th>\n",
" <td>Hoopa Confined</td>\n",
" <td>Psychic</td>\n",
" <td>Ghost</td>\n",
" <td>80</td>\n",
" <td>110</td>\n",
" <td>60</td>\n",
" <td>150</td>\n",
" <td>130</td>\n",
" <td>70</td>\n",
" <td>6</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>798</th>\n",
" <td>Hoopa Unbound</td>\n",
" <td>Psychic</td>\n",
" <td>Dark</td>\n",
" <td>80</td>\n",
" <td>160</td>\n",
" <td>60</td>\n",
" <td>170</td>\n",
" <td>130</td>\n",
" <td>80</td>\n",
" <td>6</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>799</th>\n",
" <td>Volcanion</td>\n",
" <td>Fire</td>\n",
" <td>Water</td>\n",
" <td>80</td>\n",
" <td>110</td>\n",
" <td>120</td>\n",
" <td>130</td>\n",
" <td>90</td>\n",
" <td>70</td>\n",
" <td>6</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>800 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def \\\n",
"0 Bulbasaur Grass Poison 45 49 49 65 65 \n",
"1 Ivysaur Grass Poison 60 62 63 80 80 \n",
"2 Venusaur Grass Poison 80 82 83 100 100 \n",
"3 Mega Venusaur Grass Poison 80 100 123 122 120 \n",
"4 Charmander Fire NaN 39 52 43 60 50 \n",
".. ... ... ... .. ... ... ... ... \n",
"795 Diancie Rock Fairy 50 100 150 100 150 \n",
"796 Mega Diancie Rock Fairy 50 160 110 160 110 \n",
"797 Hoopa Confined Psychic Ghost 80 110 60 150 130 \n",
"798 Hoopa Unbound Psychic Dark 80 160 60 170 130 \n",
"799 Volcanion Fire Water 80 110 120 130 90 \n",
" Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed \\\n",
"0 Bulbasaur Grass Poison 45 49 49 65 65 45 \n",
"1 Ivysaur Grass Poison 60 62 63 80 80 60 \n",
"2 Venusaur Grass Poison 80 82 83 100 100 80 \n",
"3 Mega Venusaur Grass Poison 80 100 123 122 120 80 \n",
"4 Charmander Fire NaN 39 52 43 60 50 65 \n",
"\n",
" Speed Generation Legendary \n",
"0 45 1 False \n",
"1 60 1 False \n",
"2 80 1 False \n",
"3 80 1 False \n",
"4 65 1 False \n",
".. ... ... ... \n",
"795 50 6 True \n",
"796 110 6 True \n",
"797 70 6 True \n",
"798 80 6 True \n",
"799 70 6 True \n",
"\n",
"[800 rows x 11 columns]"
" Generation Legendary \n",
"0 1 False \n",
"1 1 False \n",
"2 1 False \n",
"3 1 False \n",
"4 1 False "
]
},
"execution_count": 3,
Expand All @@ -285,7 +185,7 @@
],
"source": [
"df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n",
"df"
"df.head()\n"
]
},
{
Expand All @@ -297,11 +197,39 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The T-statistic: 3.590444254130357\n",
"The p_value : 0.0002567969150153481\n",
"The p value is less than a ! We reject the H0, theat mean there is a different and Dragon does have more HP\n"
]
}
],
"source": [
"#code here"
"# H0 there is no different / the mean of dragon is the same of grass\n",
"# H1 dragon has more HP / dragon has more hp\n",
"\n",
"dragon_hp = df[df[\"Type 1\"] == \"Dragon\"][\"HP\"]\n",
"grass_hp = df[df[\"Type 1\"] == \"Grass\"][\"HP\"]\n",
"\n",
"t_stat, p_val = ttest_ind(dragon_hp, grass_hp, alternative=\"greater\")\n",
"\n",
"print(\"The T-statistic:\", t_stat)\n",
"print(\"The p_value :\", p_val)\n",
"\n",
"a = 0.05\n",
"\n",
"alpha = 0.05\n",
"\n",
"if p_val > a :\n",
" print(\"The p value is greater than a ! We fail to reject the H0, that mean there is no different between their HP \")\n",
"else:\n",
" print(\"The p value is less than a ! We reject the H0, theat mean there is a different and Dragon does have more HP\")\n"
]
},
{
Expand All @@ -313,11 +241,69 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 13,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" HP\n",
"T-statistic: 8.036124405043928\n",
"The p value: 3.3306476848461913e-15\n",
" We reject the H0, that means there is a different between them\n",
"\n",
" Attack\n",
"T-statistic: 10.397321023700622\n",
"The p value: 7.827253003205333e-24\n",
" We reject the H0, that means there is a different between them\n",
"\n",
" Defense\n",
"T-statistic: 7.181240122992339\n",
"The p value: 1.5842226094427255e-12\n",
" We reject the H0, that means there is a different between them\n",
"\n",
" Sp. Atk\n",
"T-statistic: 14.191406210846289\n",
"The p value: 6.314915770427265e-41\n",
" We reject the H0, that means there is a different between them\n",
"\n",
" Sp. Def\n",
"T-statistic: 11.03775106120522\n",
"The p value: 1.8439809580409597e-26\n",
" We reject the H0, that means there is a different between them\n",
"\n",
" Speed\n",
"T-statistic: 9.765234331931898\n",
"The p value: 2.3540754436898437e-21\n",
" We reject the H0, that means there is a different between them\n"
]
}
],
"source": [
"#code here"
"# H0 legendary Pokemons has no diiferent than none legendary \n",
"# H1 legendary Pokemons has more porwer than none legendary\n",
"stats_cols = ['HP', 'Attack', 'Defense', 'Sp. Atk',\n",
" 'Sp. Def', 'Speed']\n",
"\n",
"legendary_df = df[df[\"Legendary\"] ==True]\n",
"none_legendary = df[df[\"Legendary\"] ==False]\n",
"\n",
"alpha = 0.05\n",
"\n",
"for col in stats_cols:\n",
" t_stat, p_val = ttest_ind(legendary_df[col], none_legendary[col], alternative=\"two-sided\")\n",
" print(f\"\\n {col}\")\n",
" print(f\"T-statistic:\", t_stat)\n",
" print(f\"The p value:\", p_val)\n",
"\n",
" if p_val > alpha:\n",
" print(\" We fail to reject H0, that means the is no diffenrent\")\n",
"\n",
" else:\n",
" print(\" We reject the H0, that means there is a different between them\")\n",
" "
]
},
{
Expand All @@ -337,7 +323,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -453,14 +439,14 @@
"4 624.0 262.0 1.9250 65500.0 "
]
},
"execution_count": 5,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n",
"df.head()"
"housing = pd.read_csv('https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv')\n",
"housing.head()\n"
]
},
{
Expand All @@ -481,6 +467,54 @@
" "
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T-statistic = 38.04632342033554\n",
"P-value = 2.408917945663922e-304\n",
"We reject the H0, houses that are colser to school are more expensive\n"
]
}
],
"source": [
"school_coords = (-118, 34)\n",
"hospital_coords = (-122, 37)\n",
"\n",
"def euclidean_distance(lon1, lat1, lon2, lat2):\n",
" return np.sqrt((lon1 - lon2)**2 + (lat1 - lat2)**2)\n",
"\n",
"housing[\"dist_school\"] = euclidean_distance(housing[\"longitude\"], housing[\"latitude\"], school_coords[0], school_coords[1])\n",
"housing[\"dist_hospital\"] = euclidean_distance(housing[\"longitude\"], housing[\"latitude\"], hospital_coords[0], hospital_coords[1])\n",
"\n",
"\n",
"# houses is close 0.50 from schools and hospital\n",
"housing[\"is_close\"] = ((housing[\"dist_school\"] < 0.50) | (housing[\"dist_hospital\"] < 0.50))\n",
"\n",
"# making colse and far houses \n",
"close_houses = housing[housing[\"is_close\"] == True][\"median_house_value\"]\n",
"far_houses = housing[housing[\"is_close\"] == False][\"median_house_value\"]\n",
"\n",
"# making two sample test\n",
"t_stat, p_val = ttest_ind(close_houses, far_houses, alternative=\"greater\")\n",
"\n",
"print(f\"T-statistic = {t_stat}\")\n",
"print(f\"P-value = {p_val}\")\n",
"\n",
"alpha = 0.05\n",
"\n",
"if p_val < alpha:\n",
" print(\"We reject the H0, houses that are colser to school are more expensive\")\n",
"else:\n",
" print(\"We fail to reject the H0, houses that are closer are not more expensive\")\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -498,7 +532,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "base",
"language": "python",
"name": "python3"
},
Expand All @@ -512,7 +546,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.12.2"
}
},
"nbformat": 4,
Expand Down