data-bootcamp-v4 · fran-eliot · Sep 15, 2025 · Sep 15, 2025
diff --git a/Image20250607120457.jpg b/Image20250607120457.jpg
diff --git a/Image20250607120510.jpg b/Image20250607120510.jpg
diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -278,7 +278,7 @@
        "[800 rows x 11 columns]"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -297,11 +297,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: 4.4991348531252635\n",
+      "P-value: 7.0097137393088205e-06\n",
+      "Reject the null hypothesis: Dragon-type Pokémon have significantly higher average HP than Grass-type Pokémon.\n"
+     ]
+    }
+   ],
    "source": [
-    "#code here"
+    "# MOD: considerar Type 1 y Type 2 para Dragon/Grass (pequeña mejora)\n",
+    "dragon_mask = (df['Type 1'] == 'Dragon') | ((\"Type 2\" in df.columns) & (df['Type 2'] == 'Dragon'))\n",
+    "grass_mask  = (df['Type 1'] == 'Grass')  | ((\"Type 2\" in df.columns) & (df['Type 2'] == 'Grass'))\n",
+    "dragon_hp = df.loc[dragon_mask, 'HP'].dropna()\n",
+    "grass_hp  = df.loc[grass_mask,  'HP'].dropna()\n",
+    "# Filter HP values for Dragon and Grass types\n",
+    "\n",
+    "# Perform a one-tailed t-test\n",
+    "t_stat, p_value = st.ttest_ind(dragon_hp, grass_hp, alternative='greater')\n",
+    "\n",
+    "# Print the results\n",
+    "print(f\"T-statistic: {t_stat}\")\n",
+    "print(f\"P-value: {p_value}\")\n",
+    "\n",
+    "# Conclusion\n",
+    "if p_value < 0.05:\n",
+    "    print(\"Reject the null hypothesis: Dragon-type Pokémon have significantly higher average HP than Grass-type Pokémon.\")\n",
+    "else:\n",
+    "    print(\"Fail to reject the null hypothesis: No significant difference in average HP between Dragon and Grass types.\")"
    ]
   },
   {
@@ -313,11 +341,139 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "HP: t-statistic = 8.98, p-value = 0.0000\n",
+      "  -> Significant difference in HP between Legendary and Non-Legendary Pokémon.\n",
+      "Attack: t-statistic = 10.44, p-value = 0.0000\n",
+      "  -> Significant difference in Attack between Legendary and Non-Legendary Pokémon.\n",
+      "Defense: t-statistic = 7.64, p-value = 0.0000\n",
+      "  -> Significant difference in Defense between Legendary and Non-Legendary Pokémon.\n",
+      "Sp. Atk: t-statistic = 13.42, p-value = 0.0000\n",
+      "  -> Significant difference in Sp. Atk between Legendary and Non-Legendary Pokémon.\n",
+      "Sp. Def: t-statistic = 10.02, p-value = 0.0000\n",
+      "  -> Significant difference in Sp. Def between Legendary and Non-Legendary Pokémon.\n",
+      "Speed: t-statistic = 11.48, p-value = 0.0000\n",
+      "  -> Significant difference in Speed between Legendary and Non-Legendary Pokémon.\n"
+     ]
+    }
+   ],
    "source": [
-    "#code here"
+    "# Separate Legendary and Non-Legendary Pokémon\n",
+    "legendary = df[df['Legendary'] == True]\n",
+    "non_legendary = df[df['Legendary'] == False]\n",
+    "\n",
+    "# List of stats to compare\n",
+    "stats = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n",
+    "\n",
+    "# Perform t-tests for each stat\n",
+    "results = {}\n",
+    "for stat in stats:\n",
+    "    t_stat, p_value = st.ttest_ind(legendary[stat], non_legendary[stat], equal_var=False)  # Welch's t-test\n",
+    "    results[stat] = {'t_stat': t_stat, 'p_value': p_value}\n",
+    "\n",
+    "# Print the results\n",
+    "for stat, result in results.items():\n",
+    "    print(f\"{stat}: t-statistic = {result['t_stat']:.2f}, p-value = {result['p_value']:.4f}\")\n",
+    "    if result['p_value'] < 0.05:\n",
+    "        print(f\"  -> Significant difference in {stat} between Legendary and Non-Legendary Pokémon.\")\n",
+    "    else:\n",
+    "        print(f\"  -> No significant difference in {stat} between Legendary and Non-Legendary Pokémon.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>Legendary</th>\n",
+       "      <th>Non-Legendary</th>\n",
+       "      <th>Legendary</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>HP</th>\n",
+       "      <td>67.182313</td>\n",
+       "      <td>92.738462</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Attack</th>\n",
+       "      <td>75.669388</td>\n",
+       "      <td>116.676923</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Defense</th>\n",
+       "      <td>71.559184</td>\n",
+       "      <td>99.661538</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Sp. Atk</th>\n",
+       "      <td>68.454422</td>\n",
+       "      <td>122.184615</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Sp. Def</th>\n",
+       "      <td>68.892517</td>\n",
+       "      <td>105.938462</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Speed</th>\n",
+       "      <td>65.455782</td>\n",
+       "      <td>100.184615</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Legendary  Non-Legendary   Legendary\n",
+       "HP             67.182313   92.738462\n",
+       "Attack         75.669388  116.676923\n",
+       "Defense        71.559184   99.661538\n",
+       "Sp. Atk        68.454422  122.184615\n",
+       "Sp. Def        68.892517  105.938462\n",
+       "Speed          65.455782  100.184615"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# MOD: mostrar también medias por grupo para contextualizar las diferencias\n",
+    "import pandas as pd\n",
+    "stats_cols = [c for c in ['HP','Attack','Defense','Sp. Atk','Sp. Def','Speed'] if c in df.columns]\n",
+    "if stats_cols:\n",
+    "    means_table = df.groupby('Legendary')[stats_cols].mean().rename(index={True:'Legendary', False:'Non-Legendary'})\n",
+    "    display(means_table.T)\n",
+    "else:\n",
+    "    print('No se encontraron columnas de estadísticas estándar para mostrar medias.')"
    ]
   },
   {
@@ -483,10 +639,52 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: 37.992330214201516\n",
+      "P-value: 3.0064957768592614e-301\n",
+      "Reject the null hypothesis: Houses close to a school or hospital are significantly more expensive.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# School and hospital coordinates\n",
+    "school_coords = (-118, 34)\n",
+    "hospital_coords = (-122, 37)\n",
+    "\n",
+    "# Function to calculate Euclidean distance\n",
+    "def calculate_distance(lat, lon, coords):\n",
+    "    return np.sqrt((lat - coords[1])**2 + (lon - coords[0])**2)\n",
+    "\n",
+    "# Calculate distances to school and hospital\n",
+    "df['distance_to_school'] = calculate_distance(df['latitude'], df['longitude'], school_coords)\n",
+    "df['distance_to_hospital'] = calculate_distance(df['latitude'], df['longitude'], hospital_coords)\n",
+    "\n",
+    "# Determine if a house is close to either a school or hospital\n",
+    "df['close_to_school_or_hospital'] = (df['distance_to_school'] < 0.50) | (df['distance_to_hospital'] < 0.50)\n",
+    "\n",
+    "# Divide the dataset into two groups\n",
+    "close_group = df[df['close_to_school_or_hospital'] == True]['median_house_value']\n",
+    "far_group = df[df['close_to_school_or_hospital'] == False]['median_house_value']\n",
+    "\n",
+    "# Perform a two-sample t-test\n",
+    "t_stat, p_value = st.ttest_ind(close_group, far_group, equal_var=False)  # Welch's t-test\n",
+    "\n",
+    "# Print the results\n",
+    "print(f\"T-statistic: {t_stat}\")\n",
+    "print(f\"P-value: {p_value}\")\n",
+    "\n",
+    "# Conclusion\n",
+    "if p_value < 0.05:\n",
+    "    print(\"Reject the null hypothesis: Houses close to a school or hospital are significantly more expensive.\")\n",
+    "else:\n",
+    "    print(\"Fail to reject the null hypothesis: No significant difference in house prices.\")"
+   ]
   },
   {
    "cell_type": "code",
@@ -498,7 +696,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "venv",
    "language": "python",
    "name": "python3"
   },
@@ -512,7 +710,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.12.2"
   }
  },
  "nbformat": 4,