data-bootcamp-v4 · PedroGoncalves84 · Sep 9, 2025
diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -278,7 +278,7 @@
        "[800 rows x 11 columns]"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -297,11 +297,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: 3.335, One-tailed p-value: 0.0008\n",
+      "Reject H0: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\n",
+      "Mean HP - Dragon: 83.31, Grass: 67.27\n"
+     ]
+    }
+   ],
    "source": [
-    "#code here"
+    "# Filter HP stats by type\n",
+    "dragon_hp = df[df['Type 1'] == 'Dragon']['HP']\n",
+    "grass_hp = df[df['Type 1'] == 'Grass']['HP']\n",
+    "\n",
+    "# Two-sample t-test (assume unequal variance, one-tailed)\n",
+    "t_stat, p_value_two_tailed = st.ttest_ind(dragon_hp, grass_hp, equal_var=False)\n",
+    "\n",
+    "# Convert to one-tailed p-value for Dragon > Grass\n",
+    "if t_stat > 0:\n",
+    "    p_value_one_tailed = p_value_two_tailed / 2\n",
+    "else:\n",
+    "    p_value_one_tailed = 1 - p_value_two_tailed / 2\n",
+    "\n",
+    "print(f\"T-statistic: {t_stat:.3f}, One-tailed p-value: {p_value_one_tailed:.4f}\")\n",
+    "\n",
+    "# Significance check\n",
+    "alpha = 0.05\n",
+    "if p_value_one_tailed < alpha:\n",
+    "    print(\"Reject H0: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\")\n",
+    "else:\n",
+    "    print(\"Fail to reject H0: No significant difference in HP between Dragon and Grass Pokémon.\")\n",
+    "\n",
+    "# Optional: mean HP for both types\n",
+    "mean_dragon = np.mean(dragon_hp)\n",
+    "mean_grass = np.mean(grass_hp)\n",
+    "print(f\"Mean HP - Dragon: {mean_dragon:.2f}, Grass: {mean_grass:.2f}\")"
    ]
   },
   {
@@ -313,11 +348,84 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Stat: HP\n",
+      "T-statistic: 8.981, Two-tailed p-value: 0.0000\n",
+      "Reject H0: Legendary and Non-Legendary Pokémon have significantly different HP.\n",
+      "\n",
+      "Stat: Attack\n",
+      "T-statistic: 10.438, Two-tailed p-value: 0.0000\n",
+      "Reject H0: Legendary and Non-Legendary Pokémon have significantly different Attack.\n",
+      "\n",
+      "Stat: Defense\n",
+      "T-statistic: 7.637, Two-tailed p-value: 0.0000\n",
+      "Reject H0: Legendary and Non-Legendary Pokémon have significantly different Defense.\n",
+      "\n",
+      "Stat: Sp. Atk\n",
+      "T-statistic: 13.417, Two-tailed p-value: 0.0000\n",
+      "Reject H0: Legendary and Non-Legendary Pokémon have significantly different Sp. Atk.\n",
+      "\n",
+      "Stat: Sp. Def\n",
+      "T-statistic: 10.016, Two-tailed p-value: 0.0000\n",
+      "Reject H0: Legendary and Non-Legendary Pokémon have significantly different Sp. Def.\n",
+      "\n",
+      "Stat: Speed\n",
+      "T-statistic: 11.475, Two-tailed p-value: 0.0000\n",
+      "Reject H0: Legendary and Non-Legendary Pokémon have significantly different Speed.\n",
+      "\n",
+      "Mean stats for Legendary Pokémon:\n",
+      "HP          92.738462\n",
+      "Attack     116.676923\n",
+      "Defense     99.661538\n",
+      "Sp. Atk    122.184615\n",
+      "Sp. Def    105.938462\n",
+      "Speed      100.184615\n",
+      "dtype: float64\n",
+      "\n",
+      "Mean stats for Non-Legendary Pokémon:\n",
+      "HP         67.182313\n",
+      "Attack     75.669388\n",
+      "Defense    71.559184\n",
+      "Sp. Atk    68.454422\n",
+      "Sp. Def    68.892517\n",
+      "Speed      65.455782\n",
+      "dtype: float64\n"
+     ]
+    }
+   ],
    "source": [
-    "#code here"
+    "# Columns to test\n",
+    "stats_cols = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n",
+    "\n",
+    "# Split data\n",
+    "legendary = df[df['Legendary'] == True]\n",
+    "non_legendary = df[df['Legendary'] == False]\n",
+    "\n",
+    "# Significance level\n",
+    "alpha = 0.05\n",
+    "\n",
+    "# Perform independent t-tests for each stat\n",
+    "for col in stats_cols:\n",
+    "    t_stat, p_value_two_tailed = st.ttest_ind(legendary[col], non_legendary[col], equal_var=False)\n",
+    "    print(f\"\\nStat: {col}\")\n",
+    "    print(f\"T-statistic: {t_stat:.3f}, Two-tailed p-value: {p_value_two_tailed:.4f}\")\n",
+    "    if p_value_two_tailed < alpha:\n",
+    "        print(f\"Reject H0: Legendary and Non-Legendary Pokémon have significantly different {col}.\")\n",
+    "    else:\n",
+    "        print(f\"Fail to reject H0: No significant difference in {col} between Legendary and Non-Legendary Pokémon.\")\n",
+    "\n",
+    "# Optional: print mean stats for each group\n",
+    "print(\"\\nMean stats for Legendary Pokémon:\")\n",
+    "print(legendary[stats_cols].mean())\n",
+    "print(\"\\nMean stats for Non-Legendary Pokémon:\")\n",
+    "print(non_legendary[stats_cols].mean())"
    ]
   },
   {
@@ -337,7 +445,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -453,7 +561,7 @@
        "4       624.0       262.0         1.9250             65500.0  "
       ]
      },
-     "execution_count": 5,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -483,22 +591,84 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income', 'median_house_value']\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df.columns.tolist())"
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: 37.992\n",
+      "One-tailed p-value: 0.0000\n",
+      "Reject H0: Houses close to a school or hospital are significantly more expensive.\n",
+      "Mean house value (close): 246951.98\n",
+      "Mean house value (far): 180678.44\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Function to calculate Euclidean distance\n",
+    "def euclidean_distance(lon1, lat1, lon2, lat2):\n",
+    "    return np.sqrt((lon1 - lon2)**2 + (lat1 - lat2)**2)\n",
+    "\n",
+    "# Calculate distance from each house to school and hospital\n",
+    "df['dist_school'] = euclidean_distance(df['longitude'], df['latitude'], school_coord[0], school_coord[1])\n",
+    "df['dist_hospital'] = euclidean_distance(df['longitude'], df['latitude'], hospital_coord[0], hospital_coord[1])\n",
+    "\n",
+    "# Minimum distance to either school or hospital\n",
+    "df['min_dist'] = df[['dist_school', 'dist_hospital']].min(axis=1)\n",
+    "\n",
+    "# Split dataset into houses close and far (distance < 0.50 is close)\n",
+    "close_houses = df[df['min_dist'] < 0.50]['median_house_value']\n",
+    "far_houses = df[df['min_dist'] >= 0.50]['median_house_value']\n",
+    "\n",
+    "# Two-sample t-test (unequal variance)\n",
+    "t_stat, p_value_two_tailed = stats.ttest_ind(close_houses, far_houses, equal_var=False)\n",
+    "\n",
+    "# Convert to one-tailed p-value for \"close houses > far houses\"\n",
+    "if t_stat > 0:\n",
+    "    p_value_one_tailed = p_value_two_tailed / 2\n",
+    "else:\n",
+    "    p_value_one_tailed = 1 - p_value_two_tailed / 2\n",
+    "\n",
+    "# Significance level\n",
+    "alpha = 0.05\n",
+    "\n",
+    "# Print results\n",
+    "print(f\"T-statistic: {t_stat:.3f}\")\n",
+    "print(f\"One-tailed p-value: {p_value_one_tailed:.4f}\")\n",
+    "\n",
+    "if p_value_one_tailed < alpha:\n",
+    "    print(\"Reject H0: Houses close to a school or hospital are significantly more expensive.\")\n",
+    "else:\n",
+    "    print(\"Fail to reject H0: No significant difference in house value based on proximity.\")\n",
+    "\n",
+    "# Optional: Mean house values for interpretation\n",
+    "print(f\"Mean house value (close): {close_houses.mean():.2f}\")\n",
+    "print(f\"Mean house value (far): {far_houses.mean():.2f}\")"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "tfdf_env",
    "language": "python",
    "name": "python3"
   },
@@ -512,7 +682,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.11.13"
   }
  },
  "nbformat": 4,