From 32139c589c19194c602c150518aafd209ab00693 Mon Sep 17 00:00:00 2001
From: mariamnez <mariamtnez1969@gmail.com>
Date: Mon, 25 Aug 2025 14:14:50 +0200
Subject: [PATCH] Add files via upload

---
 lab-hypothesis-testing-SOLVED.ipynb | 616 ++++++++++++++++++++++++++++
 1 file changed, 616 insertions(+)
 create mode 100644 lab-hypothesis-testing-SOLVED.ipynb
diff --git a/lab-hypothesis-testing-SOLVED.ipynb b/lab-hypothesis-testing-SOLVED.ipynb
new file mode 100644
index 0000000..35693d5
--- /dev/null
+++ b/lab-hypothesis-testing-SOLVED.ipynb
@@ -0,0 +1,616 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Lab | Hypothesis Testing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Objective**\n",
+    "\n",
+    "Welcome to the Hypothesis Testing Lab, where we embark on an enlightening journey through the realm of statistical decision-making! In this laboratory, we delve into various scenarios, applying the powerful tools of hypothesis testing to scrutinize and interpret data.\n",
+    "\n",
+    "From testing the mean of a single sample (One Sample T-Test), to investigating differences between independent groups (Two Sample T-Test), and exploring relationships within dependent samples (Paired Sample T-Test), our exploration knows no bounds. Furthermore, we'll venture into the realm of Analysis of Variance (ANOVA), unraveling the complexities of comparing means across multiple groups.\n",
+    "\n",
+    "So, grab your statistical tools, prepare your hypotheses, and let's embark on this fascinating journey of exploration and discovery in the world of hypothesis testing!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Challenge 1**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this challenge, we will be working with pokemon data. The data can be found here:\n",
+    "\n",
+    "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#libraries\n",
+    "import pandas as pd\n",
+    "import scipy.stats as st\n",
+    "import numpy as np\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Type 1</th>\n",
+       "      <th>Type 2</th>\n",
+       "      <th>HP</th>\n",
+       "      <th>Attack</th>\n",
+       "      <th>Defense</th>\n",
+       "      <th>Sp. Atk</th>\n",
+       "      <th>Sp. Def</th>\n",
+       "      <th>Speed</th>\n",
+       "      <th>Generation</th>\n",
+       "      <th>Legendary</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Bulbasaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>45</td>\n",
+       "      <td>49</td>\n",
+       "      <td>49</td>\n",
+       "      <td>65</td>\n",
+       "      <td>65</td>\n",
+       "      <td>45</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Ivysaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>60</td>\n",
+       "      <td>62</td>\n",
+       "      <td>63</td>\n",
+       "      <td>80</td>\n",
+       "      <td>80</td>\n",
+       "      <td>60</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Venusaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>80</td>\n",
+       "      <td>82</td>\n",
+       "      <td>83</td>\n",
+       "      <td>100</td>\n",
+       "      <td>100</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Mega Venusaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>80</td>\n",
+       "      <td>100</td>\n",
+       "      <td>123</td>\n",
+       "      <td>122</td>\n",
+       "      <td>120</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Charmander</td>\n",
+       "      <td>Fire</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>39</td>\n",
+       "      <td>52</td>\n",
+       "      <td>43</td>\n",
+       "      <td>60</td>\n",
+       "      <td>50</td>\n",
+       "      <td>65</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>795</th>\n",
+       "      <td>Diancie</td>\n",
+       "      <td>Rock</td>\n",
+       "      <td>Fairy</td>\n",
+       "      <td>50</td>\n",
+       "      <td>100</td>\n",
+       "      <td>150</td>\n",
+       "      <td>100</td>\n",
+       "      <td>150</td>\n",
+       "      <td>50</td>\n",
+       "      <td>6</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>796</th>\n",
+       "      <td>Mega Diancie</td>\n",
+       "      <td>Rock</td>\n",
+       "      <td>Fairy</td>\n",
+       "      <td>50</td>\n",
+       "      <td>160</td>\n",
+       "      <td>110</td>\n",
+       "      <td>160</td>\n",
+       "      <td>110</td>\n",
+       "      <td>110</td>\n",
+       "      <td>6</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>797</th>\n",
+       "      <td>Hoopa Confined</td>\n",
+       "      <td>Psychic</td>\n",
+       "      <td>Ghost</td>\n",
+       "      <td>80</td>\n",
+       "      <td>110</td>\n",
+       "      <td>60</td>\n",
+       "      <td>150</td>\n",
+       "      <td>130</td>\n",
+       "      <td>70</td>\n",
+       "      <td>6</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>798</th>\n",
+       "      <td>Hoopa Unbound</td>\n",
+       "      <td>Psychic</td>\n",
+       "      <td>Dark</td>\n",
+       "      <td>80</td>\n",
+       "      <td>160</td>\n",
+       "      <td>60</td>\n",
+       "      <td>170</td>\n",
+       "      <td>130</td>\n",
+       "      <td>80</td>\n",
+       "      <td>6</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>799</th>\n",
+       "      <td>Volcanion</td>\n",
+       "      <td>Fire</td>\n",
+       "      <td>Water</td>\n",
+       "      <td>80</td>\n",
+       "      <td>110</td>\n",
+       "      <td>120</td>\n",
+       "      <td>130</td>\n",
+       "      <td>90</td>\n",
+       "      <td>70</td>\n",
+       "      <td>6</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>800 rows × 11 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               Name   Type 1  Type 2  HP  Attack  Defense  Sp. Atk  Sp. Def  \\\n",
+       "0         Bulbasaur    Grass  Poison  45      49       49       65       65   \n",
+       "1           Ivysaur    Grass  Poison  60      62       63       80       80   \n",
+       "2          Venusaur    Grass  Poison  80      82       83      100      100   \n",
+       "3     Mega Venusaur    Grass  Poison  80     100      123      122      120   \n",
+       "4        Charmander     Fire     NaN  39      52       43       60       50   \n",
+       "..              ...      ...     ...  ..     ...      ...      ...      ...   \n",
+       "795         Diancie     Rock   Fairy  50     100      150      100      150   \n",
+       "796    Mega Diancie     Rock   Fairy  50     160      110      160      110   \n",
+       "797  Hoopa Confined  Psychic   Ghost  80     110       60      150      130   \n",
+       "798   Hoopa Unbound  Psychic    Dark  80     160       60      170      130   \n",
+       "799       Volcanion     Fire   Water  80     110      120      130       90   \n",
+       "\n",
+       "     Speed  Generation  Legendary  \n",
+       "0       45           1      False  \n",
+       "1       60           1      False  \n",
+       "2       80           1      False  \n",
+       "3       80           1      False  \n",
+       "4       65           1      False  \n",
+       "..     ...         ...        ...  \n",
+       "795     50           6       True  \n",
+       "796    110           6       True  \n",
+       "797     70           6       True  \n",
+       "798     80           6       True  \n",
+       "799     70           6       True  \n",
+       "\n",
+       "[800 rows x 11 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Load Pokémon into its own variable to avoid collisions with later datasets\n",
+    "df_poke = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n",
+    "df_poke.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- We posit that Pokemons of type Dragon have, on average, more HP stats than Grass. Choose the propper test and, with 5% significance, comment your findings."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hypothesis: Dragon-type Pokémon have, on average, higher HP than non-Dragon Pokémon.\n",
+    "# H0: μ_Dragon ≤ μ_Others   vs   H1: μ_Dragon > μ_Others\n",
+    "\n",
+    "# Identify Dragon Pokémon if 'Dragon' appears in either Type 1 or Type 2\n",
+    "is_dragon = df_poke['Type 1'].str.contains('Dragon', case=False, na=False)\n",
+    "if 'Type 2' in df_poke.columns:\n",
+    "    is_dragon = is_dragon | df_poke['Type 2'].str.contains('Dragon', case=False, na=False)\n",
+    "\n",
+    "dragon_hp = df_poke.loc[is_dragon, 'HP'].dropna()\n",
+    "other_hp  = df_poke.loc[~is_dragon, 'HP'].dropna()\n",
+    "\n",
+    "# Welch's t-test (independent samples, unequal variances). We'll convert from two-sided to one-sided.\n",
+    "t_stat, p_two_sided = st.ttest_ind(dragon_hp, other_hp, equal_var=False, nan_policy='omit')\n",
+    "\n",
+    "# One-sided p-value (H1: Dragon > Others)\n",
+    "p_one_sided = p_two_sided/2 if t_stat > 0 else 1 - p_two_sided/2\n",
+    "\n",
+    "alpha = 0.05\n",
+    "print(f\"n_dragon={len(dragon_hp)}, n_others={len(other_hp)}\")\n",
+    "print(f\"mean_dragon_HP={dragon_hp.mean():.2f}, mean_others_HP={other_hp.mean():.2f}\")\n",
+    "print(f\"Welch t={t_stat:.3f}, one-sided p={p_one_sided:.4g}\")\n",
+    "print(\"Decision (α=0.05):\", \"Reject H0 (Dragons have higher HP)\" if p_one_sided < alpha else \"Fail to reject H0\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- We posit that Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) when comparing with Non-Legendary. Choose the propper test and, with 5% significance, comment your findings.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hypothesis: Legendary Pokémon have different stats than non-Legendary (two-sided tests).\n",
+    "# We'll run Welch's t-tests for each stat and show Bonferroni-adjusted p-values.\n",
+    "\n",
+    "# Find the Legendary column name robustly (accepts 'Legendary', 'is_legendary', etc.)\n",
+    "legend_col = None\n",
+    "for c in df_poke.columns:\n",
+    "    norm = c.lower().replace(\" \", \"\").replace(\".\", \"\").replace(\"_\", \"\").replace(\"?\", \"\")\n",
+    "    if norm in (\"legendary\", \"islegendary\"):\n",
+    "        legend_col = c\n",
+    "        break\n",
+    "if legend_col is None:\n",
+    "    raise KeyError(\"Legendary / is_legendary column not found in Pokémon dataset.\")\n",
+    "\n",
+    "# Make a boolean mask for legendary\n",
+    "is_legendary = df_poke[legend_col]\n",
+    "if is_legendary.dtype != bool:\n",
+    "    is_legendary = is_legendary.astype(str).str.lower().isin(['true','1','yes','t','legendary'])\n",
+    "\n",
+    "# Stats columns (keep only those present)\n",
+    "stats_cols = [c for c in ['HP','Attack','Defense','Sp. Atk','Sp. Def','Speed'] if c in df_poke.columns]\n",
+    "\n",
+    "results = []\n",
+    "for col in stats_cols:\n",
+    "    x = df_poke.loc[is_legendary, col].dropna()\n",
+    "    y = df_poke.loc[~is_legendary, col].dropna()\n",
+    "    t_stat, p_two = st.ttest_ind(x, y, equal_var=False, nan_policy='omit')\n",
+    "    results.append({\n",
+    "        'stat': col,\n",
+    "        'n_legendary': len(x),\n",
+    "        'n_nonlegendary': len(y),\n",
+    "        'mean_legendary': float(x.mean()) if len(x) else np.nan,\n",
+    "        'mean_nonlegendary': float(y.mean()) if len(y) else np.nan,\n",
+    "        't': float(t_stat),\n",
+    "        'p_two_sided': float(p_two)\n",
+    "    })\n",
+    "\n",
+    "res_df = pd.DataFrame(results)\n",
+    "m = len(res_df)\n",
+    "res_df['p_bonferroni'] = (res_df['p_two_sided'] * m).clip(upper=1.0)\n",
+    "res_df.sort_values('p_two_sided', inplace=True)\n",
+    "res_df.reset_index(drop=True, inplace=True)\n",
+    "res_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Challenge 2**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this challenge, we will be working with california-housing data. The data can be found here:\n",
+    "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>longitude</th>\n",
+       "      <th>latitude</th>\n",
+       "      <th>housing_median_age</th>\n",
+       "      <th>total_rooms</th>\n",
+       "      <th>total_bedrooms</th>\n",
+       "      <th>population</th>\n",
+       "      <th>households</th>\n",
+       "      <th>median_income</th>\n",
+       "      <th>median_house_value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-114.31</td>\n",
+       "      <td>34.19</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>5612.0</td>\n",
+       "      <td>1283.0</td>\n",
+       "      <td>1015.0</td>\n",
+       "      <td>472.0</td>\n",
+       "      <td>1.4936</td>\n",
+       "      <td>66900.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-114.47</td>\n",
+       "      <td>34.40</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>7650.0</td>\n",
+       "      <td>1901.0</td>\n",
+       "      <td>1129.0</td>\n",
+       "      <td>463.0</td>\n",
+       "      <td>1.8200</td>\n",
+       "      <td>80100.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-114.56</td>\n",
+       "      <td>33.69</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>720.0</td>\n",
+       "      <td>174.0</td>\n",
+       "      <td>333.0</td>\n",
+       "      <td>117.0</td>\n",
+       "      <td>1.6509</td>\n",
+       "      <td>85700.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-114.57</td>\n",
+       "      <td>33.64</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>1501.0</td>\n",
+       "      <td>337.0</td>\n",
+       "      <td>515.0</td>\n",
+       "      <td>226.0</td>\n",
+       "      <td>3.1917</td>\n",
+       "      <td>73400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-114.57</td>\n",
+       "      <td>33.57</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>1454.0</td>\n",
+       "      <td>326.0</td>\n",
+       "      <td>624.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>1.9250</td>\n",
+       "      <td>65500.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \\\n",
+       "0    -114.31     34.19                15.0       5612.0          1283.0   \n",
+       "1    -114.47     34.40                19.0       7650.0          1901.0   \n",
+       "2    -114.56     33.69                17.0        720.0           174.0   \n",
+       "3    -114.57     33.64                14.0       1501.0           337.0   \n",
+       "4    -114.57     33.57                20.0       1454.0           326.0   \n",
+       "\n",
+       "   population  households  median_income  median_house_value  \n",
+       "0      1015.0       472.0         1.4936             66900.0  \n",
+       "1      1129.0       463.0         1.8200             80100.0  \n",
+       "2       333.0       117.0         1.6509             85700.0  \n",
+       "3       515.0       226.0         3.1917             73400.0  \n",
+       "4       624.0       262.0         1.9250             65500.0  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**We posit that houses close to either a school or a hospital are more expensive.**\n",
+    "\n",
+    "- School coordinates (-118, 34)\n",
+    "- Hospital coordinates (-122, 37)\n",
+    "\n",
+    "We consider a house (neighborhood) to be close to a school or hospital if the distance is lower than 0.50.\n",
+    "\n",
+    "Hint:\n",
+    "- Write a function to calculate euclidean distance from each house (neighborhood) to the school and to the hospital.\n",
+    "- Divide your dataset into houses close and far from either a hospital or school.\n",
+    "- Choose the propper test and, with 5% significance, comment your findings.\n",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Compute Euclidean distances to the given school and hospital, then flag 'near' vs 'far'\n",
+    "import numpy as np\n",
+    "\n",
+    "def euclidean_distance(lon, lat, lon0, lat0):\n",
+    "    return np.sqrt((lon - lon0)**2 + (lat - lat0)**2)\n",
+    "\n",
+    "school = (-118.0, 34.0)\n",
+    "hospital = (-122.0, 37.0)\n",
+    "\n",
+    "df['dist_school'] = euclidean_distance(df['longitude'], df['latitude'], school[0], school[1])\n",
+    "df['dist_hospital'] = euclidean_distance(df['longitude'], df['latitude'], hospital[0], hospital[1])\n",
+    "df['dist_min'] = df[['dist_school','dist_hospital']].min(axis=1)\n",
+    "\n",
+    "threshold = 0.50  # as specified\n",
+    "df['near'] = df['dist_min'] < threshold\n",
+    "\n",
+    "df[['longitude','latitude','dist_school','dist_hospital','dist_min','near']].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hypothesis: Houses near (within 0.50 of a school or hospital) are more expensive.\n",
+    "# H0: μ_near ≤ μ_far   vs   H1: μ_near > μ_far  (one-sided)\n",
+    "\n",
+    "near_vals = df.loc[df['near'], 'median_house_value'].dropna()\n",
+    "far_vals  = df.loc[~df['near'], 'median_house_value'].dropna()\n",
+    "\n",
+    "t_stat, p_two_sided = st.ttest_ind(near_vals, far_vals, equal_var=False, nan_policy='omit')\n",
+    "p_one_sided = p_two_sided/2 if t_stat > 0 else 1 - p_two_sided/2\n",
+    "\n",
+    "alpha = 0.05\n",
+    "print(f\"n_near={len(near_vals)}, n_far={len(far_vals)}\")\n",
+    "print(f\"mean_near={near_vals.mean():.2f}, mean_far={far_vals.mean():.2f}\")\n",
+    "print(f\"Welch t={t_stat:.3f}, one-sided p={p_one_sided:.4g}\")\n",
+    "print(\"Decision (α=0.05):\", \"Reject H0 (near are pricier)\" if p_one_sided < alpha else \"Fail to reject H0\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

	Name	Type 1	Type 2	HP	Attack	Defense	Sp. Atk	Sp. Def	Speed	Generation	Legendary
0	Bulbasaur	Grass	Poison	45	49	49	65	65	45	1	False
1	Ivysaur	Grass	Poison	60	62	63	80	80	60	1	False
2	Venusaur	Grass	Poison	80	82	83	100	100	80	1	False
3	Mega Venusaur	Grass	Poison	80	100	123	122	120	80	1	False
4	Charmander	Fire	NaN	39	52	43	60	50	65	1	False
...	...	...	...	...	...	...	...	...	...	...	...
795	Diancie	Rock	Fairy	50	100	150	100	150	50	6	True
796	Mega Diancie	Rock	Fairy	50	160	110	160	110	110	6	True
797	Hoopa Confined	Psychic	Ghost	80	110	60	150	130	70	6	True
798	Hoopa Unbound	Psychic	Dark	80	160	60	170	130	80	6	True
799	Volcanion	Fire	Water	80	110	120	130	90	70	6	True
	longitude	latitude	housing_median_age	total_rooms	total_bedrooms	population	households	median_income	median_house_value
0	-114.31	34.19	15.0	5612.0	1283.0	1015.0	472.0	1.4936	66900.0
1	-114.47	34.40	19.0	7650.0	1901.0	1129.0	463.0	1.8200	80100.0
2	-114.56	33.69	17.0	720.0	174.0	333.0	117.0	1.6509	85700.0
3	-114.57	33.64	14.0	1501.0	337.0	515.0	226.0	3.1917	73400.0
4	-114.57	33.57	20.0	1454.0	326.0	624.0	262.0	1.9250	65500.0