diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb
index 0cc26d5..d047a06 100644
--- a/lab-hypothesis-testing.ipynb
+++ b/lab-hypothesis-testing.ipynb
@@ -38,7 +38,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 165,
"metadata": {},
"outputs": [],
"source": [
@@ -51,7 +51,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 166,
"metadata": {},
"outputs": [
{
@@ -278,7 +278,7 @@
"[800 rows x 11 columns]"
]
},
- "execution_count": 3,
+ "execution_count": 166,
"metadata": {},
"output_type": "execute_result"
}
@@ -288,56 +288,1433 @@
"df"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 167,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " type1 | \n",
+ " type2 | \n",
+ " hp | \n",
+ " attack | \n",
+ " defense | \n",
+ " sp_atk | \n",
+ " sp_def | \n",
+ " speed | \n",
+ " generation | \n",
+ " legendary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Bulbasaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 45 | \n",
+ " 49 | \n",
+ " 49 | \n",
+ " 65 | \n",
+ " 65 | \n",
+ " 45 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Ivysaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 60 | \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 80 | \n",
+ " 80 | \n",
+ " 60 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 80 | \n",
+ " 82 | \n",
+ " 83 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Mega Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 80 | \n",
+ " 100 | \n",
+ " 123 | \n",
+ " 122 | \n",
+ " 120 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Charmander | \n",
+ " Fire | \n",
+ " NaN | \n",
+ " 39 | \n",
+ " 52 | \n",
+ " 43 | \n",
+ " 60 | \n",
+ " 50 | \n",
+ " 65 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 795 | \n",
+ " Diancie | \n",
+ " Rock | \n",
+ " Fairy | \n",
+ " 50 | \n",
+ " 100 | \n",
+ " 150 | \n",
+ " 100 | \n",
+ " 150 | \n",
+ " 50 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 796 | \n",
+ " Mega Diancie | \n",
+ " Rock | \n",
+ " Fairy | \n",
+ " 50 | \n",
+ " 160 | \n",
+ " 110 | \n",
+ " 160 | \n",
+ " 110 | \n",
+ " 110 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 797 | \n",
+ " Hoopa Confined | \n",
+ " Psychic | \n",
+ " Ghost | \n",
+ " 80 | \n",
+ " 110 | \n",
+ " 60 | \n",
+ " 150 | \n",
+ " 130 | \n",
+ " 70 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 798 | \n",
+ " Hoopa Unbound | \n",
+ " Psychic | \n",
+ " Dark | \n",
+ " 80 | \n",
+ " 160 | \n",
+ " 60 | \n",
+ " 170 | \n",
+ " 130 | \n",
+ " 80 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 799 | \n",
+ " Volcanion | \n",
+ " Fire | \n",
+ " Water | \n",
+ " 80 | \n",
+ " 110 | \n",
+ " 120 | \n",
+ " 130 | \n",
+ " 90 | \n",
+ " 70 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
800 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " name type1 type2 hp attack defense sp_atk sp_def \\\n",
+ "0 Bulbasaur Grass Poison 45 49 49 65 65 \n",
+ "1 Ivysaur Grass Poison 60 62 63 80 80 \n",
+ "2 Venusaur Grass Poison 80 82 83 100 100 \n",
+ "3 Mega Venusaur Grass Poison 80 100 123 122 120 \n",
+ "4 Charmander Fire NaN 39 52 43 60 50 \n",
+ ".. ... ... ... .. ... ... ... ... \n",
+ "795 Diancie Rock Fairy 50 100 150 100 150 \n",
+ "796 Mega Diancie Rock Fairy 50 160 110 160 110 \n",
+ "797 Hoopa Confined Psychic Ghost 80 110 60 150 130 \n",
+ "798 Hoopa Unbound Psychic Dark 80 160 60 170 130 \n",
+ "799 Volcanion Fire Water 80 110 120 130 90 \n",
+ "\n",
+ " speed generation legendary \n",
+ "0 45 1 False \n",
+ "1 60 1 False \n",
+ "2 80 1 False \n",
+ "3 80 1 False \n",
+ "4 65 1 False \n",
+ ".. ... ... ... \n",
+ "795 50 6 True \n",
+ "796 110 6 True \n",
+ "797 70 6 True \n",
+ "798 80 6 True \n",
+ "799 70 6 True \n",
+ "\n",
+ "[800 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 167,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns=df.columns.str.strip().str.lower().str.replace('.', '_').str.replace(' ', '')\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 168,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.isna().sum()\n",
+ "df=df.dropna().reset_index(drop=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 169,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " type1 | \n",
+ " type2 | \n",
+ " hp | \n",
+ " attack | \n",
+ " defense | \n",
+ " sp_atk | \n",
+ " sp_def | \n",
+ " speed | \n",
+ " generation | \n",
+ " legendary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Bulbasaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 45 | \n",
+ " 49 | \n",
+ " 49 | \n",
+ " 65 | \n",
+ " 65 | \n",
+ " 45 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Ivysaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 60 | \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 80 | \n",
+ " 80 | \n",
+ " 60 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 80 | \n",
+ " 82 | \n",
+ " 83 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Mega Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 80 | \n",
+ " 100 | \n",
+ " 123 | \n",
+ " 122 | \n",
+ " 120 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Charizard | \n",
+ " Fire | \n",
+ " Flying | \n",
+ " 78 | \n",
+ " 84 | \n",
+ " 78 | \n",
+ " 109 | \n",
+ " 85 | \n",
+ " 100 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 409 | \n",
+ " Diancie | \n",
+ " Rock | \n",
+ " Fairy | \n",
+ " 50 | \n",
+ " 100 | \n",
+ " 150 | \n",
+ " 100 | \n",
+ " 150 | \n",
+ " 50 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 410 | \n",
+ " Mega Diancie | \n",
+ " Rock | \n",
+ " Fairy | \n",
+ " 50 | \n",
+ " 160 | \n",
+ " 110 | \n",
+ " 160 | \n",
+ " 110 | \n",
+ " 110 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 411 | \n",
+ " Hoopa Confined | \n",
+ " Psychic | \n",
+ " Ghost | \n",
+ " 80 | \n",
+ " 110 | \n",
+ " 60 | \n",
+ " 150 | \n",
+ " 130 | \n",
+ " 70 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 412 | \n",
+ " Hoopa Unbound | \n",
+ " Psychic | \n",
+ " Dark | \n",
+ " 80 | \n",
+ " 160 | \n",
+ " 60 | \n",
+ " 170 | \n",
+ " 130 | \n",
+ " 80 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 413 | \n",
+ " Volcanion | \n",
+ " Fire | \n",
+ " Water | \n",
+ " 80 | \n",
+ " 110 | \n",
+ " 120 | \n",
+ " 130 | \n",
+ " 90 | \n",
+ " 70 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
414 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " name type1 type2 hp attack defense sp_atk sp_def \\\n",
+ "0 Bulbasaur Grass Poison 45 49 49 65 65 \n",
+ "1 Ivysaur Grass Poison 60 62 63 80 80 \n",
+ "2 Venusaur Grass Poison 80 82 83 100 100 \n",
+ "3 Mega Venusaur Grass Poison 80 100 123 122 120 \n",
+ "4 Charizard Fire Flying 78 84 78 109 85 \n",
+ ".. ... ... ... .. ... ... ... ... \n",
+ "409 Diancie Rock Fairy 50 100 150 100 150 \n",
+ "410 Mega Diancie Rock Fairy 50 160 110 160 110 \n",
+ "411 Hoopa Confined Psychic Ghost 80 110 60 150 130 \n",
+ "412 Hoopa Unbound Psychic Dark 80 160 60 170 130 \n",
+ "413 Volcanion Fire Water 80 110 120 130 90 \n",
+ "\n",
+ " speed generation legendary \n",
+ "0 45 1 False \n",
+ "1 60 1 False \n",
+ "2 80 1 False \n",
+ "3 80 1 False \n",
+ "4 100 1 False \n",
+ ".. ... ... ... \n",
+ "409 50 6 True \n",
+ "410 110 6 True \n",
+ "411 70 6 True \n",
+ "412 80 6 True \n",
+ "413 70 6 True \n",
+ "\n",
+ "[414 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 169,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- We posit that Pokemons of type Dragon have, on average, more HP stats than Grass. Choose the propper test and, with 5% significance, comment your findings."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# H0: Grp1_mean <= Grp2_mean\n",
+ "# H1: Grp1_mean > Grp2_mean"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 170,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 45\n",
+ "1 60\n",
+ "2 80\n",
+ "3 80\n",
+ "24 45\n",
+ "25 60\n",
+ "26 75\n",
+ "32 50\n",
+ "33 65\n",
+ "34 80\n",
+ "55 60\n",
+ "56 95\n",
+ "94 35\n",
+ "95 55\n",
+ "96 75\n",
+ "133 70\n",
+ "145 70\n",
+ "146 90\n",
+ "157 60\n",
+ "172 50\n",
+ "181 70\n",
+ "196 99\n",
+ "215 95\n",
+ "223 40\n",
+ "224 60\n",
+ "256 60\n",
+ "257 90\n",
+ "258 90\n",
+ "281 100\n",
+ "300 40\n",
+ "301 60\n",
+ "321 69\n",
+ "322 114\n",
+ "327 44\n",
+ "328 74\n",
+ "349 91\n",
+ "364 88\n",
+ "Name: hp, dtype: int64"
+ ]
+ },
+ "execution_count": 170,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "grp_B=df[df[\"type1\"]==\"Grass\"][\"hp\"]\n",
+ "grp_B"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 171,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "76 91\n",
+ "183 75\n",
+ "184 75\n",
+ "201 95\n",
+ "202 95\n",
+ "207 80\n",
+ "208 80\n",
+ "209 80\n",
+ "210 80\n",
+ "212 105\n",
+ "213 105\n",
+ "245 58\n",
+ "246 68\n",
+ "247 108\n",
+ "248 108\n",
+ "352 100\n",
+ "353 100\n",
+ "356 125\n",
+ "357 125\n",
+ "358 125\n",
+ "408 108\n",
+ "Name: hp, dtype: int64"
+ ]
+ },
+ "execution_count": 171,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "grp_A=df[df[\"type1\"]==\"Dragon\"][\"hp\"]\n",
+ "grp_A"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 172,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=4.649881427485321, pvalue=2.068191387085888e-05, df=56.0)"
+ ]
+ },
+ "execution_count": 172,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from scipy import stats\n",
+ "# This returns the t-statistic and the p-value\n",
+ "stats.ttest_ind(grp_A, grp_B)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 173,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# H0: Grp1_mean <= Grp2_mean\n",
+ "# H1: Grp1_mean > Grp2_mean\n",
+ "## The p-value=2.06e-05 is lower than a significance level of 0.05, we fail to reject H0 and support that Grp 1 has more stats than to Grp2."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- We posit that Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) when comparing with Non-Legendary. Choose the propper test and, with 5% significance, comment your findings.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# H0: Legendary Pokemons have same stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) as Non-legendary\n",
+ "# H1: Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) compared to Non-legendary"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 174,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Separate the groups based on the 'legendary' column\n",
+ "stats=[\"hp\", \"attack\", \"defense\", \"sp_atk\", \"sp_def\", \"speed\",\"legendary\"]\n",
+ "df_stats = df[stats]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 175,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " hp | \n",
+ " attack | \n",
+ " defense | \n",
+ " sp_atk | \n",
+ " sp_def | \n",
+ " speed | \n",
+ " legendary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 45 | \n",
+ " 49 | \n",
+ " 49 | \n",
+ " 65 | \n",
+ " 65 | \n",
+ " 45 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 60 | \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 80 | \n",
+ " 80 | \n",
+ " 60 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 80 | \n",
+ " 82 | \n",
+ " 83 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 80 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 80 | \n",
+ " 100 | \n",
+ " 123 | \n",
+ " 122 | \n",
+ " 120 | \n",
+ " 80 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 78 | \n",
+ " 84 | \n",
+ " 78 | \n",
+ " 109 | \n",
+ " 85 | \n",
+ " 100 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 409 | \n",
+ " 50 | \n",
+ " 100 | \n",
+ " 150 | \n",
+ " 100 | \n",
+ " 150 | \n",
+ " 50 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 410 | \n",
+ " 50 | \n",
+ " 160 | \n",
+ " 110 | \n",
+ " 160 | \n",
+ " 110 | \n",
+ " 110 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 411 | \n",
+ " 80 | \n",
+ " 110 | \n",
+ " 60 | \n",
+ " 150 | \n",
+ " 130 | \n",
+ " 70 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 412 | \n",
+ " 80 | \n",
+ " 160 | \n",
+ " 60 | \n",
+ " 170 | \n",
+ " 130 | \n",
+ " 80 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 413 | \n",
+ " 80 | \n",
+ " 110 | \n",
+ " 120 | \n",
+ " 130 | \n",
+ " 90 | \n",
+ " 70 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
414 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " hp attack defense sp_atk sp_def speed legendary\n",
+ "0 45 49 49 65 65 45 False\n",
+ "1 60 62 63 80 80 60 False\n",
+ "2 80 82 83 100 100 80 False\n",
+ "3 80 100 123 122 120 80 False\n",
+ "4 78 84 78 109 85 100 False\n",
+ ".. .. ... ... ... ... ... ...\n",
+ "409 50 100 150 100 150 50 True\n",
+ "410 50 160 110 160 110 110 True\n",
+ "411 80 110 60 150 130 70 True\n",
+ "412 80 160 60 170 130 80 True\n",
+ "413 80 110 120 130 90 70 True\n",
+ "\n",
+ "[414 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 175,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 176,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from statsmodels.multivariate.manova import MANOVA"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 180,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Multivariate linear model\n",
+ "================================================================\n",
+ " \n",
+ "----------------------------------------------------------------\n",
+ " Intercept Value Num DF Den DF F Value Pr > F\n",
+ "----------------------------------------------------------------\n",
+ " Wilks' lambda 0.0530 6.0000 407.0000 1211.8954 0.0000\n",
+ " Pillai's trace 0.9470 6.0000 407.0000 1211.8954 0.0000\n",
+ " Hotelling-Lawley trace 17.8658 6.0000 407.0000 1211.8954 0.0000\n",
+ " Roy's greatest root 17.8658 6.0000 407.0000 1211.8954 0.0000\n",
+ "----------------------------------------------------------------\n",
+ " \n",
+ "----------------------------------------------------------------\n",
+ " legendary Value Num DF Den DF F Value Pr > F\n",
+ "----------------------------------------------------------------\n",
+ " Wilks' lambda 0.7117 6.0000 407.0000 27.4810 0.0000\n",
+ " Pillai's trace 0.2883 6.0000 407.0000 27.4810 0.0000\n",
+ " Hotelling-Lawley trace 0.4051 6.0000 407.0000 27.4810 0.0000\n",
+ " Roy's greatest root 0.4051 6.0000 407.0000 27.4810 0.0000\n",
+ "================================================================\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "# Define the formula: all six stats explained by the 'legendary' status\n",
+ "formula = 'hp + attack + defense + sp_atk + sp_def + speed ~ legendary'\n",
+ "\n",
+ "# Run the MANOVA model\n",
+ "manova = MANOVA.from_formula(formula, data=df_stats)\n",
+ "\n",
+ "# Get the multivariate test results (Focus on Wilks' lambda)\n",
+ "manova_results = manova.mv_test()\n",
+ "print(manova_results)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The p_value of all four test is less than 0.05 so we reject the null hypothesis\n",
+ "# Obtained result supports the claim that Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) compared to Non-legendary "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Challenge 2**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this challenge, we will be working with california-housing data. The data can be found here:\n",
+ "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 181,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " longitude | \n",
+ " latitude | \n",
+ " housing_median_age | \n",
+ " total_rooms | \n",
+ " total_bedrooms | \n",
+ " population | \n",
+ " households | \n",
+ " median_income | \n",
+ " median_house_value | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " -114.31 | \n",
+ " 34.19 | \n",
+ " 15.0 | \n",
+ " 5612.0 | \n",
+ " 1283.0 | \n",
+ " 1015.0 | \n",
+ " 472.0 | \n",
+ " 1.4936 | \n",
+ " 66900.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " -114.47 | \n",
+ " 34.40 | \n",
+ " 19.0 | \n",
+ " 7650.0 | \n",
+ " 1901.0 | \n",
+ " 1129.0 | \n",
+ " 463.0 | \n",
+ " 1.8200 | \n",
+ " 80100.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " -114.56 | \n",
+ " 33.69 | \n",
+ " 17.0 | \n",
+ " 720.0 | \n",
+ " 174.0 | \n",
+ " 333.0 | \n",
+ " 117.0 | \n",
+ " 1.6509 | \n",
+ " 85700.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " -114.57 | \n",
+ " 33.64 | \n",
+ " 14.0 | \n",
+ " 1501.0 | \n",
+ " 337.0 | \n",
+ " 515.0 | \n",
+ " 226.0 | \n",
+ " 3.1917 | \n",
+ " 73400.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " -114.57 | \n",
+ " 33.57 | \n",
+ " 20.0 | \n",
+ " 1454.0 | \n",
+ " 326.0 | \n",
+ " 624.0 | \n",
+ " 262.0 | \n",
+ " 1.9250 | \n",
+ " 65500.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " longitude latitude housing_median_age total_rooms total_bedrooms \\\n",
+ "0 -114.31 34.19 15.0 5612.0 1283.0 \n",
+ "1 -114.47 34.40 19.0 7650.0 1901.0 \n",
+ "2 -114.56 33.69 17.0 720.0 174.0 \n",
+ "3 -114.57 33.64 14.0 1501.0 337.0 \n",
+ "4 -114.57 33.57 20.0 1454.0 326.0 \n",
+ "\n",
+ " population households median_income median_house_value \n",
+ "0 1015.0 472.0 1.4936 66900.0 \n",
+ "1 1129.0 463.0 1.8200 80100.0 \n",
+ "2 333.0 117.0 1.6509 85700.0 \n",
+ "3 515.0 226.0 3.1917 73400.0 \n",
+ "4 624.0 262.0 1.9250 65500.0 "
+ ]
+ },
+ "execution_count": 181,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n",
+ "df.head()"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "- We posit that Pokemons of type Dragon have, on average, more HP stats than Grass. Choose the propper test and, with 5% significance, comment your findings."
+ "**We posit that houses close to either a school or a hospital are more expensive.**\n",
+ "\n",
+ "- School coordinates (-118, 34)\n",
+ "- Hospital coordinates (-122, 37)\n",
+ "\n",
+ "We consider a house (neighborhood) to be close to a school or hospital if the distance is lower than 0.50.\n",
+ "\n",
+ "Hint:\n",
+ "- Write a function to calculate euclidean distance from each house (neighborhood) to the school and to the hospital.\n",
+ "- Divide your dataset into houses close and far from either a hospital or school.\n",
+ "- Choose the propper test and, with 5% significance, comment your findings.\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 182,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: geopy in c:\\users\\hp\\anaconda3\\lib\\site-packages (2.4.1)\n",
+ "Requirement already satisfied: pandas in c:\\users\\hp\\anaconda3\\lib\\site-packages (2.3.3)\n",
+ "Requirement already satisfied: geographiclib<3,>=1.52 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from geopy) (2.1)\n",
+ "Requirement already satisfied: numpy>=1.26.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from pandas) (1.26.4)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from pandas) (2.9.0.post0)\n",
+ "Requirement already satisfied: pytz>=2020.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from pandas) (2024.1)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from pandas) (2023.3)\n",
+ "Requirement already satisfied: six>=1.5 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ }
+ ],
+ "source": [
+ "pip install geopy pandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 183,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# H0: Houses close to either a school or a hospital are more expensive.\n",
+ "# H1: houses far from either a school or a hospital are more expensive."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 184,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from geopy.distance import great_circle"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 185,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "def calculate_great_circle_distance(row,target_point):\n",
+ " \n",
+ " # Point 1 (Start) must be (latitude, longitude)\n",
+ " start_point = (row['latitude'], row['longitude'])\n",
+ " \n",
+ " # Point 2 (End) must be (latitude, longitude)\n",
+ " end_point = target_point\n",
+ " \n",
+ " # The great_circle function returns a distance object; we extract miles/km\n",
+ " # Returning distance in Kilometers (you can use .miles, .meters, etc.)\n",
+ " distance_km = great_circle(start_point, end_point).km\n",
+ " return distance_km"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 186,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "school_point = (34.00, -118.00) \n",
+ "hospital_point = (37.00, -122.00) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 187,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[\"distance_school\"]=df.apply(calculate_great_circle_distance,axis=1, target_point=school_point)\n",
+ "df[\"distance_hospital\"]=df.apply(calculate_great_circle_distance,axis=1, target_point=hospital_point)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 188,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " longitude | \n",
+ " latitude | \n",
+ " housing_median_age | \n",
+ " total_rooms | \n",
+ " total_bedrooms | \n",
+ " population | \n",
+ " households | \n",
+ " median_income | \n",
+ " median_house_value | \n",
+ " distance_school | \n",
+ " distance_hospital | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " -114.31 | \n",
+ " 34.19 | \n",
+ " 15.0 | \n",
+ " 5612.0 | \n",
+ " 1283.0 | \n",
+ " 1015.0 | \n",
+ " 472.0 | \n",
+ " 1.4936 | \n",
+ " 66900.0 | \n",
+ " 340.418792 | \n",
+ " 761.974459 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " -114.47 | \n",
+ " 34.40 | \n",
+ " 19.0 | \n",
+ " 7650.0 | \n",
+ " 1901.0 | \n",
+ " 1129.0 | \n",
+ " 463.0 | \n",
+ " 1.8200 | \n",
+ " 80100.0 | \n",
+ " 327.659611 | \n",
+ " 738.576332 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " -114.56 | \n",
+ " 33.69 | \n",
+ " 17.0 | \n",
+ " 720.0 | \n",
+ " 174.0 | \n",
+ " 333.0 | \n",
+ " 117.0 | \n",
+ " 1.6509 | \n",
+ " 85700.0 | \n",
+ " 319.542519 | \n",
+ " 768.308921 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " -114.57 | \n",
+ " 33.64 | \n",
+ " 14.0 | \n",
+ " 1501.0 | \n",
+ " 337.0 | \n",
+ " 515.0 | \n",
+ " 226.0 | \n",
+ " 3.1917 | \n",
+ " 73400.0 | \n",
+ " 319.365481 | \n",
+ " 770.371666 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " -114.57 | \n",
+ " 33.57 | \n",
+ " 20.0 | \n",
+ " 1454.0 | \n",
+ " 326.0 | \n",
+ " 624.0 | \n",
+ " 262.0 | \n",
+ " 1.9250 | \n",
+ " 65500.0 | \n",
+ " 320.561844 | \n",
+ " 774.421877 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 16995 | \n",
+ " -124.26 | \n",
+ " 40.58 | \n",
+ " 52.0 | \n",
+ " 2217.0 | \n",
+ " 394.0 | \n",
+ " 907.0 | \n",
+ " 369.0 | \n",
+ " 2.3571 | \n",
+ " 111400.0 | \n",
+ " 917.047590 | \n",
+ " 443.615947 | \n",
+ "
\n",
+ " \n",
+ " | 16996 | \n",
+ " -124.27 | \n",
+ " 40.69 | \n",
+ " 36.0 | \n",
+ " 2349.0 | \n",
+ " 528.0 | \n",
+ " 1194.0 | \n",
+ " 465.0 | \n",
+ " 2.5179 | \n",
+ " 79000.0 | \n",
+ " 927.102523 | \n",
+ " 454.929344 | \n",
+ "
\n",
+ " \n",
+ " | 16997 | \n",
+ " -124.30 | \n",
+ " 41.84 | \n",
+ " 17.0 | \n",
+ " 2677.0 | \n",
+ " 531.0 | \n",
+ " 1244.0 | \n",
+ " 456.0 | \n",
+ " 3.0313 | \n",
+ " 103600.0 | \n",
+ " 1031.462839 | \n",
+ " 573.239315 | \n",
+ "
\n",
+ " \n",
+ " | 16998 | \n",
+ " -124.30 | \n",
+ " 41.80 | \n",
+ " 19.0 | \n",
+ " 2672.0 | \n",
+ " 552.0 | \n",
+ " 1298.0 | \n",
+ " 478.0 | \n",
+ " 1.9797 | \n",
+ " 85800.0 | \n",
+ " 1027.794143 | \n",
+ " 569.086288 | \n",
+ "
\n",
+ " \n",
+ " | 16999 | \n",
+ " -124.35 | \n",
+ " 40.54 | \n",
+ " 52.0 | \n",
+ " 1820.0 | \n",
+ " 300.0 | \n",
+ " 806.0 | \n",
+ " 270.0 | \n",
+ " 3.0147 | \n",
+ " 94600.0 | \n",
+ " 918.431001 | \n",
+ " 443.183095 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
17000 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " longitude latitude housing_median_age total_rooms total_bedrooms \\\n",
+ "0 -114.31 34.19 15.0 5612.0 1283.0 \n",
+ "1 -114.47 34.40 19.0 7650.0 1901.0 \n",
+ "2 -114.56 33.69 17.0 720.0 174.0 \n",
+ "3 -114.57 33.64 14.0 1501.0 337.0 \n",
+ "4 -114.57 33.57 20.0 1454.0 326.0 \n",
+ "... ... ... ... ... ... \n",
+ "16995 -124.26 40.58 52.0 2217.0 394.0 \n",
+ "16996 -124.27 40.69 36.0 2349.0 528.0 \n",
+ "16997 -124.30 41.84 17.0 2677.0 531.0 \n",
+ "16998 -124.30 41.80 19.0 2672.0 552.0 \n",
+ "16999 -124.35 40.54 52.0 1820.0 300.0 \n",
+ "\n",
+ " population households median_income median_house_value \\\n",
+ "0 1015.0 472.0 1.4936 66900.0 \n",
+ "1 1129.0 463.0 1.8200 80100.0 \n",
+ "2 333.0 117.0 1.6509 85700.0 \n",
+ "3 515.0 226.0 3.1917 73400.0 \n",
+ "4 624.0 262.0 1.9250 65500.0 \n",
+ "... ... ... ... ... \n",
+ "16995 907.0 369.0 2.3571 111400.0 \n",
+ "16996 1194.0 465.0 2.5179 79000.0 \n",
+ "16997 1244.0 456.0 3.0313 103600.0 \n",
+ "16998 1298.0 478.0 1.9797 85800.0 \n",
+ "16999 806.0 270.0 3.0147 94600.0 \n",
+ "\n",
+ " distance_school distance_hospital \n",
+ "0 340.418792 761.974459 \n",
+ "1 327.659611 738.576332 \n",
+ "2 319.542519 768.308921 \n",
+ "3 319.365481 770.371666 \n",
+ "4 320.561844 774.421877 \n",
+ "... ... ... \n",
+ "16995 917.047590 443.615947 \n",
+ "16996 927.102523 454.929344 \n",
+ "16997 1031.462839 573.239315 \n",
+ "16998 1027.794143 569.086288 \n",
+ "16999 918.431001 443.183095 \n",
+ "\n",
+ "[17000 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 188,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
]
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 189,
"metadata": {},
"outputs": [],
"source": [
- "#code here"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- We posit that Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) when comparing with Non-Legendary. Choose the propper test and, with 5% significance, comment your findings.\n"
+ "cond1=df[\"distance_school\"]<0.5\n",
+ "cond2=df[\"distance_hospital\"]<0.5"
]
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 190,
"metadata": {},
"outputs": [],
"source": [
- "#code here"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Challenge 2**"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "In this challenge, we will be working with california-housing data. The data can be found here:\n",
- "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv"
+ "df[\"distance_category\"] = np.where(\n",
+ " cond1 | cond2,\"close\", \"far\" )"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 191,
"metadata": {},
"outputs": [
{
@@ -370,6 +1747,9 @@
" households | \n",
" median_income | \n",
" median_house_value | \n",
+ " distance_school | \n",
+ " distance_hospital | \n",
+ " distance_category | \n",
" \n",
" \n",
" \n",
@@ -384,6 +1764,9 @@
" 472.0 | \n",
" 1.4936 | \n",
" 66900.0 | \n",
+ " 340.418792 | \n",
+ " 761.974459 | \n",
+ " far | \n",
" \n",
" \n",
" | 1 | \n",
@@ -396,6 +1779,9 @@
" 463.0 | \n",
" 1.8200 | \n",
" 80100.0 | \n",
+ " 327.659611 | \n",
+ " 738.576332 | \n",
+ " far | \n",
"
\n",
" \n",
" | 2 | \n",
@@ -408,6 +1794,9 @@
" 117.0 | \n",
" 1.6509 | \n",
" 85700.0 | \n",
+ " 319.542519 | \n",
+ " 768.308921 | \n",
+ " far | \n",
"
\n",
" \n",
" | 3 | \n",
@@ -420,6 +1809,9 @@
" 226.0 | \n",
" 3.1917 | \n",
" 73400.0 | \n",
+ " 319.365481 | \n",
+ " 770.371666 | \n",
+ " far | \n",
"
\n",
" \n",
" | 4 | \n",
@@ -432,53 +1824,237 @@
" 262.0 | \n",
" 1.9250 | \n",
" 65500.0 | \n",
+ " 320.561844 | \n",
+ " 774.421877 | \n",
+ " far | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 16995 | \n",
+ " -124.26 | \n",
+ " 40.58 | \n",
+ " 52.0 | \n",
+ " 2217.0 | \n",
+ " 394.0 | \n",
+ " 907.0 | \n",
+ " 369.0 | \n",
+ " 2.3571 | \n",
+ " 111400.0 | \n",
+ " 917.047590 | \n",
+ " 443.615947 | \n",
+ " far | \n",
+ "
\n",
+ " \n",
+ " | 16996 | \n",
+ " -124.27 | \n",
+ " 40.69 | \n",
+ " 36.0 | \n",
+ " 2349.0 | \n",
+ " 528.0 | \n",
+ " 1194.0 | \n",
+ " 465.0 | \n",
+ " 2.5179 | \n",
+ " 79000.0 | \n",
+ " 927.102523 | \n",
+ " 454.929344 | \n",
+ " far | \n",
+ "
\n",
+ " \n",
+ " | 16997 | \n",
+ " -124.30 | \n",
+ " 41.84 | \n",
+ " 17.0 | \n",
+ " 2677.0 | \n",
+ " 531.0 | \n",
+ " 1244.0 | \n",
+ " 456.0 | \n",
+ " 3.0313 | \n",
+ " 103600.0 | \n",
+ " 1031.462839 | \n",
+ " 573.239315 | \n",
+ " far | \n",
+ "
\n",
+ " \n",
+ " | 16998 | \n",
+ " -124.30 | \n",
+ " 41.80 | \n",
+ " 19.0 | \n",
+ " 2672.0 | \n",
+ " 552.0 | \n",
+ " 1298.0 | \n",
+ " 478.0 | \n",
+ " 1.9797 | \n",
+ " 85800.0 | \n",
+ " 1027.794143 | \n",
+ " 569.086288 | \n",
+ " far | \n",
+ "
\n",
+ " \n",
+ " | 16999 | \n",
+ " -124.35 | \n",
+ " 40.54 | \n",
+ " 52.0 | \n",
+ " 1820.0 | \n",
+ " 300.0 | \n",
+ " 806.0 | \n",
+ " 270.0 | \n",
+ " 3.0147 | \n",
+ " 94600.0 | \n",
+ " 918.431001 | \n",
+ " 443.183095 | \n",
+ " far | \n",
"
\n",
" \n",
"\n",
+ "17000 rows × 12 columns
\n",
""
],
"text/plain": [
- " longitude latitude housing_median_age total_rooms total_bedrooms \\\n",
- "0 -114.31 34.19 15.0 5612.0 1283.0 \n",
- "1 -114.47 34.40 19.0 7650.0 1901.0 \n",
- "2 -114.56 33.69 17.0 720.0 174.0 \n",
- "3 -114.57 33.64 14.0 1501.0 337.0 \n",
- "4 -114.57 33.57 20.0 1454.0 326.0 \n",
+ " longitude latitude housing_median_age total_rooms total_bedrooms \\\n",
+ "0 -114.31 34.19 15.0 5612.0 1283.0 \n",
+ "1 -114.47 34.40 19.0 7650.0 1901.0 \n",
+ "2 -114.56 33.69 17.0 720.0 174.0 \n",
+ "3 -114.57 33.64 14.0 1501.0 337.0 \n",
+ "4 -114.57 33.57 20.0 1454.0 326.0 \n",
+ "... ... ... ... ... ... \n",
+ "16995 -124.26 40.58 52.0 2217.0 394.0 \n",
+ "16996 -124.27 40.69 36.0 2349.0 528.0 \n",
+ "16997 -124.30 41.84 17.0 2677.0 531.0 \n",
+ "16998 -124.30 41.80 19.0 2672.0 552.0 \n",
+ "16999 -124.35 40.54 52.0 1820.0 300.0 \n",
"\n",
- " population households median_income median_house_value \n",
- "0 1015.0 472.0 1.4936 66900.0 \n",
- "1 1129.0 463.0 1.8200 80100.0 \n",
- "2 333.0 117.0 1.6509 85700.0 \n",
- "3 515.0 226.0 3.1917 73400.0 \n",
- "4 624.0 262.0 1.9250 65500.0 "
+ " population households median_income median_house_value \\\n",
+ "0 1015.0 472.0 1.4936 66900.0 \n",
+ "1 1129.0 463.0 1.8200 80100.0 \n",
+ "2 333.0 117.0 1.6509 85700.0 \n",
+ "3 515.0 226.0 3.1917 73400.0 \n",
+ "4 624.0 262.0 1.9250 65500.0 \n",
+ "... ... ... ... ... \n",
+ "16995 907.0 369.0 2.3571 111400.0 \n",
+ "16996 1194.0 465.0 2.5179 79000.0 \n",
+ "16997 1244.0 456.0 3.0313 103600.0 \n",
+ "16998 1298.0 478.0 1.9797 85800.0 \n",
+ "16999 806.0 270.0 3.0147 94600.0 \n",
+ "\n",
+ " distance_school distance_hospital distance_category \n",
+ "0 340.418792 761.974459 far \n",
+ "1 327.659611 738.576332 far \n",
+ "2 319.542519 768.308921 far \n",
+ "3 319.365481 770.371666 far \n",
+ "4 320.561844 774.421877 far \n",
+ "... ... ... ... \n",
+ "16995 917.047590 443.615947 far \n",
+ "16996 927.102523 454.929344 far \n",
+ "16997 1031.462839 573.239315 far \n",
+ "16998 1027.794143 569.086288 far \n",
+ "16999 918.431001 443.183095 far \n",
+ "\n",
+ "[17000 rows x 12 columns]"
]
},
- "execution_count": 5,
+ "execution_count": 191,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n",
- "df.head()"
+ "df"
]
},
{
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": 192,
"metadata": {},
+ "outputs": [],
"source": [
- "**We posit that houses close to either a school or a hospital are more expensive.**\n",
- "\n",
- "- School coordinates (-118, 34)\n",
- "- Hospital coordinates (-122, 37)\n",
- "\n",
- "We consider a house (neighborhood) to be close to a school or hospital if the distance is lower than 0.50.\n",
- "\n",
- "Hint:\n",
- "- Write a function to calculate euclidean distance from each house (neighborhood) to the school and to the hospital.\n",
- "- Divide your dataset into houses close and far from either a hospital or school.\n",
- "- Choose the propper test and, with 5% significance, comment your findings.\n",
- " "
+ "# H0: avg_price_closer <= avg_price_far.\n",
+ "# H1: avg_price_closer > avg_price_far."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 193,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "## If the average price of houses closer to schools and hospitals is higher than houses far from these places then we support that they are expensive."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 194,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_close=df[df[\"distance_category\"]==\"close\"][\"median_house_value\"]\n",
+ "df_far=df[df[\"distance_category\"]==\"far\"][\"median_house_value\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 195,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "13747 137500.0\n",
+ "Name: median_house_value, dtype: float64"
+ ]
+ },
+ "execution_count": 195,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_close"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 196,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=-0.6018226451778325, pvalue=0.7263498868187508, df=16998.0)"
+ ]
+ },
+ "execution_count": 196,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from scipy import stats\n",
+ "# This returns the t-statistic and the p-value\n",
+ "stats.ttest_ind(df_close, df_far,alternative=\"greater\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 197,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "## The P-value is higher than 0.05 so we do not reject HO and support claim that average price for houses closer to school and houses --\n",
+ "## -- are not more expensive."
]
},
{
@@ -488,6 +2064,13 @@
"outputs": [],
"source": []
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -498,7 +2081,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -512,9 +2095,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.9"
+ "version": "3.12.7"
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}