data-bootcamp-v4 · TeresaSilva7361 · Aug 31, 2025
diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb
@@ -38,20 +38,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
     "#libraries\n",
     "import pandas as pd\n",
     "import scipy.stats as st\n",
     "import numpy as np\n",
-    "\n"
+    "from scipy.stats import ttest_ind\n",
+    "from statsmodels.multivariate.manova import MANOVA\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -278,7 +279,7 @@
        "[800 rows x 11 columns]"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 43,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -297,11 +298,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 44,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: 3.3350\n",
+      "P-value: 0.0008\n",
+      "✅ Dragon Pokémons have significantly higher HP than Grass Pokémons.\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
-    "#code here"
+    "dragon_hp = df[df['Type 1'] == 'Dragon']['HP']\n",
+    "grass_hp = df[df['Type 1'] == 'Grass']['HP']\n",
+    "\n",
+    "t_stat_1, p_val_1 = ttest_ind(dragon_hp, grass_hp, equal_var=False, alternative='greater')\n",
+    "\n",
+    "print(f\"T-statistic: {t_stat_1:.4f}\")\n",
+    "print(f\"P-value: {p_val_1:.4f}\")\n",
+    "if p_val_1 < 0.05:\n",
+    "    print(\"✅ Dragon Pokémons have significantly higher HP than Grass Pokémons.\\n\")\n",
+    "else:\n",
+    "    print(\"❌ No significant difference in HP between Dragon and Grass Pokémons.\\n\")"
    ]
   },
   {
@@ -313,11 +335,55 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                   Multivariate linear model\n",
+      "================================================================\n",
+      "                                                                \n",
+      "----------------------------------------------------------------\n",
+      "       Intercept         Value  Num DF  Den DF   F Value  Pr > F\n",
+      "----------------------------------------------------------------\n",
+      "          Wilks' lambda  0.0592 6.0000 793.0000 2100.8338 0.0000\n",
+      "         Pillai's trace  0.9408 6.0000 793.0000 2100.8338 0.0000\n",
+      " Hotelling-Lawley trace 15.8953 6.0000 793.0000 2100.8338 0.0000\n",
+      "    Roy's greatest root 15.8953 6.0000 793.0000 2100.8338 0.0000\n",
+      "----------------------------------------------------------------\n",
+      "                                                                \n",
+      "----------------------------------------------------------------\n",
+      "          Legendary        Value  Num DF  Den DF  F Value Pr > F\n",
+      "----------------------------------------------------------------\n",
+      "             Wilks' lambda 0.7331 6.0000 793.0000 48.1098 0.0000\n",
+      "            Pillai's trace 0.2669 6.0000 793.0000 48.1098 0.0000\n",
+      "    Hotelling-Lawley trace 0.3640 6.0000 793.0000 48.1098 0.0000\n",
+      "       Roy's greatest root 0.3640 6.0000 793.0000 48.1098 0.0000\n",
+      "================================================================\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "stats_cols = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n",
+    "\n",
+    "df = df.rename(columns={'Sp. Atk':'Sp_Atk', 'Sp. Def':'Sp_Def'})\n",
+    "stats_cols = ['HP', 'Attack', 'Defense', 'Sp_Atk', 'Sp_Def', 'Speed']\n",
+    "\n",
+    "manova = MANOVA.from_formula('HP + Attack + Defense + Sp_Atk + Sp_Def + Speed ~ Legendary', data=df)\n",
+    "manova_result = manova.mv_test()\n",
+    "\n",
+    "print(manova_result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "#code here"
+    "- The MANOVA test shows that Legendary and Non-Legendary Pokémons differ significantly across the combination of stats (HP, Attack, Defense, Sp_Atk, Sp_Def, Speed).\n",
+    "- Considering all stats together, Legendary Pokémons have a different profile than Non-Legendary Pokémons."
    ]
   },
   {
@@ -337,7 +403,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [
     {
@@ -453,7 +519,7 @@
        "4       624.0       262.0         1.9250             65500.0  "
       ]
      },
-     "execution_count": 5,
+     "execution_count": 46,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -483,22 +549,45 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 47,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: 37.9923, P-value: 0.0000\n",
+      "✅ Houses close to schools or hospitals are significantly more expensive.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "def euclidean_dist(lon1, lat1, lon2, lat2):\n",
+    "    return np.sqrt((lon1 - lon2)**2 + (lat1 - lat2)**2)\n",
+    "\n",
+    "dist_school = euclidean_dist(df['longitude'], df['latitude'], *school)\n",
+    "dist_hospital = euclidean_dist(df['longitude'], df['latitude'], *hospital)\n",
+    "\n",
+    "close = (dist_school < 0.5) | (dist_hospital < 0.5)\n",
+    "df['close_to_school_or_hospital'] = close\n",
+    "\n",
+    "close_prices = df[df['close_to_school_or_hospital']]['median_house_value']\n",
+    "far_prices = df[~df['close_to_school_or_hospital']]['median_house_value']\n",
+    "\n",
+    "t_stat, p_val = ttest_ind(close_prices, far_prices, equal_var=False)\n",
+    "\n",
+    "print(f\"T-statistic: {t_stat:.4f}, P-value: {p_val:.4f}\")\n",
+    "if p_val < 0.05:\n",
+    "    print(\"✅ Houses close to schools or hospitals are significantly more expensive.\\n\")\n",
+    "else:\n",
+    "    print(\"❌ No significant price difference based on proximity.\\n\")"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -512,9 +601,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.13.3"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }