diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb index 0cc26d5..0464aa7 100644 --- a/lab-hypothesis-testing.ipynb +++ b/lab-hypothesis-testing.ipynb @@ -38,14 +38,13 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "#libraries\n", "import pandas as pd\n", - "import scipy.stats as st\n", "import numpy as np\n", + "from scipy.stats import ttest_ind\n", "\n" ] }, @@ -159,123 +158,24 @@ " 1\n", " False\n", " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 795\n", - " Diancie\n", - " Rock\n", - " Fairy\n", - " 50\n", - " 100\n", - " 150\n", - " 100\n", - " 150\n", - " 50\n", - " 6\n", - " True\n", - " \n", - " \n", - " 796\n", - " Mega Diancie\n", - " Rock\n", - " Fairy\n", - " 50\n", - " 160\n", - " 110\n", - " 160\n", - " 110\n", - " 110\n", - " 6\n", - " True\n", - " \n", - " \n", - " 797\n", - " Hoopa Confined\n", - " Psychic\n", - " Ghost\n", - " 80\n", - " 110\n", - " 60\n", - " 150\n", - " 130\n", - " 70\n", - " 6\n", - " True\n", - " \n", - " \n", - " 798\n", - " Hoopa Unbound\n", - " Psychic\n", - " Dark\n", - " 80\n", - " 160\n", - " 60\n", - " 170\n", - " 130\n", - " 80\n", - " 6\n", - " True\n", - " \n", - " \n", - " 799\n", - " Volcanion\n", - " Fire\n", - " Water\n", - " 80\n", - " 110\n", - " 120\n", - " 130\n", - " 90\n", - " 70\n", - " 6\n", - " True\n", - " \n", " \n", "\n", - "

800 rows × 11 columns

\n", "" ], "text/plain": [ - " Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def \\\n", - "0 Bulbasaur Grass Poison 45 49 49 65 65 \n", - "1 Ivysaur Grass Poison 60 62 63 80 80 \n", - "2 Venusaur Grass Poison 80 82 83 100 100 \n", - "3 Mega Venusaur Grass Poison 80 100 123 122 120 \n", - "4 Charmander Fire NaN 39 52 43 60 50 \n", - ".. ... ... ... .. ... ... ... ... \n", - "795 Diancie Rock Fairy 50 100 150 100 150 \n", - "796 Mega Diancie Rock Fairy 50 160 110 160 110 \n", - "797 Hoopa Confined Psychic Ghost 80 110 60 150 130 \n", - "798 Hoopa Unbound Psychic Dark 80 160 60 170 130 \n", - "799 Volcanion Fire Water 80 110 120 130 90 \n", + " Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed \\\n", + "0 Bulbasaur Grass Poison 45 49 49 65 65 45 \n", + "1 Ivysaur Grass Poison 60 62 63 80 80 60 \n", + "2 Venusaur Grass Poison 80 82 83 100 100 80 \n", + "3 Mega Venusaur Grass Poison 80 100 123 122 120 80 \n", + "4 Charmander Fire NaN 39 52 43 60 50 65 \n", "\n", - " Speed Generation Legendary \n", - "0 45 1 False \n", - "1 60 1 False \n", - "2 80 1 False \n", - "3 80 1 False \n", - "4 65 1 False \n", - ".. ... ... ... \n", - "795 50 6 True \n", - "796 110 6 True \n", - "797 70 6 True \n", - "798 80 6 True \n", - "799 70 6 True \n", - "\n", - "[800 rows x 11 columns]" + " Generation Legendary \n", + "0 1 False \n", + "1 1 False \n", + "2 1 False \n", + "3 1 False \n", + "4 1 False " ] }, "execution_count": 3, @@ -285,7 +185,7 @@ ], "source": [ "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n", - "df" + "df.head()\n" ] }, { @@ -297,11 +197,39 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The T-statistic: 3.590444254130357\n", + "The p_value : 0.0002567969150153481\n", + "The p value is less than a ! We reject the H0, theat mean there is a different and Dragon does have more HP\n" + ] + } + ], "source": [ - "#code here" + "# H0 there is no different / the mean of dragon is the same of grass\n", + "# H1 dragon has more HP / dragon has more hp\n", + "\n", + "dragon_hp = df[df[\"Type 1\"] == \"Dragon\"][\"HP\"]\n", + "grass_hp = df[df[\"Type 1\"] == \"Grass\"][\"HP\"]\n", + "\n", + "t_stat, p_val = ttest_ind(dragon_hp, grass_hp, alternative=\"greater\")\n", + "\n", + "print(\"The T-statistic:\", t_stat)\n", + "print(\"The p_value :\", p_val)\n", + "\n", + "a = 0.05\n", + "\n", + "alpha = 0.05\n", + "\n", + "if p_val > a :\n", + " print(\"The p value is greater than a ! We fail to reject the H0, that mean there is no different between their HP \")\n", + "else:\n", + " print(\"The p value is less than a ! We reject the H0, theat mean there is a different and Dragon does have more HP\")\n" ] }, { @@ -313,11 +241,69 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " HP\n", + "T-statistic: 8.036124405043928\n", + "The p value: 3.3306476848461913e-15\n", + " We reject the H0, that means there is a different between them\n", + "\n", + " Attack\n", + "T-statistic: 10.397321023700622\n", + "The p value: 7.827253003205333e-24\n", + " We reject the H0, that means there is a different between them\n", + "\n", + " Defense\n", + "T-statistic: 7.181240122992339\n", + "The p value: 1.5842226094427255e-12\n", + " We reject the H0, that means there is a different between them\n", + "\n", + " Sp. Atk\n", + "T-statistic: 14.191406210846289\n", + "The p value: 6.314915770427265e-41\n", + " We reject the H0, that means there is a different between them\n", + "\n", + " Sp. Def\n", + "T-statistic: 11.03775106120522\n", + "The p value: 1.8439809580409597e-26\n", + " We reject the H0, that means there is a different between them\n", + "\n", + " Speed\n", + "T-statistic: 9.765234331931898\n", + "The p value: 2.3540754436898437e-21\n", + " We reject the H0, that means there is a different between them\n" + ] + } + ], "source": [ - "#code here" + "# H0 legendary Pokemons has no diiferent than none legendary \n", + "# H1 legendary Pokemons has more porwer than none legendary\n", + "stats_cols = ['HP', 'Attack', 'Defense', 'Sp. Atk',\n", + " 'Sp. Def', 'Speed']\n", + "\n", + "legendary_df = df[df[\"Legendary\"] ==True]\n", + "none_legendary = df[df[\"Legendary\"] ==False]\n", + "\n", + "alpha = 0.05\n", + "\n", + "for col in stats_cols:\n", + " t_stat, p_val = ttest_ind(legendary_df[col], none_legendary[col], alternative=\"two-sided\")\n", + " print(f\"\\n {col}\")\n", + " print(f\"T-statistic:\", t_stat)\n", + " print(f\"The p value:\", p_val)\n", + "\n", + " if p_val > alpha:\n", + " print(\" We fail to reject H0, that means the is no diffenrent\")\n", + "\n", + " else:\n", + " print(\" We reject the H0, that means there is a different between them\")\n", + " " ] }, { @@ -337,7 +323,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -453,14 +439,14 @@ "4 624.0 262.0 1.9250 65500.0 " ] }, - "execution_count": 5, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n", - "df.head()" + "housing = pd.read_csv('https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv')\n", + "housing.head()\n" ] }, { @@ -481,6 +467,54 @@ " " ] }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic = 38.04632342033554\n", + "P-value = 2.408917945663922e-304\n", + "We reject the H0, houses that are colser to school are more expensive\n" + ] + } + ], + "source": [ + "school_coords = (-118, 34)\n", + "hospital_coords = (-122, 37)\n", + "\n", + "def euclidean_distance(lon1, lat1, lon2, lat2):\n", + " return np.sqrt((lon1 - lon2)**2 + (lat1 - lat2)**2)\n", + "\n", + "housing[\"dist_school\"] = euclidean_distance(housing[\"longitude\"], housing[\"latitude\"], school_coords[0], school_coords[1])\n", + "housing[\"dist_hospital\"] = euclidean_distance(housing[\"longitude\"], housing[\"latitude\"], hospital_coords[0], hospital_coords[1])\n", + "\n", + "\n", + "# houses is close 0.50 from schools and hospital\n", + "housing[\"is_close\"] = ((housing[\"dist_school\"] < 0.50) | (housing[\"dist_hospital\"] < 0.50))\n", + "\n", + "# making colse and far houses \n", + "close_houses = housing[housing[\"is_close\"] == True][\"median_house_value\"]\n", + "far_houses = housing[housing[\"is_close\"] == False][\"median_house_value\"]\n", + "\n", + "# making two sample test\n", + "t_stat, p_val = ttest_ind(close_houses, far_houses, alternative=\"greater\")\n", + "\n", + "print(f\"T-statistic = {t_stat}\")\n", + "print(f\"P-value = {p_val}\")\n", + "\n", + "alpha = 0.05\n", + "\n", + "if p_val < alpha:\n", + " print(\"We reject the H0, houses that are colser to school are more expensive\")\n", + "else:\n", + " print(\"We fail to reject the H0, houses that are closer are not more expensive\")\n", + " \n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -498,7 +532,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -512,7 +546,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.12.2" } }, "nbformat": 4,