diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb index 0cc26d5..2f741bc 100644 --- a/lab-hypothesis-testing.ipynb +++ b/lab-hypothesis-testing.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 44, "metadata": {}, "outputs": [ { @@ -278,7 +278,7 @@ "[800 rows x 11 columns]" ] }, - "execution_count": 3, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -297,11 +297,55 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 45, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(np.float64(4.097528915272702), np.float64(0.00010181538122353851))" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 1. Filter groups (using 'Type 1' et 'Type 2')\n", + "hp_dragon = df[(df['Type 1'] == 'Dragon') | (df['Type 2'] == 'Dragon')]['HP']\n", + "hp_grass = df[(df['Type 1'] == 'Grass') | (df['Type 2'] == 'Grass')]['HP']\n", + "\n", + "\n", + "# 2. Run the T-test (assuming the ttest_ind function from scipy.stats)\n", + "# The parameter 'equal_var=False' is often preferred when the sample sizes are different.\n", + "from scipy import stats\n", + "t_stat, p_value_two_sided = stats.ttest_ind(hp_dragon, hp_grass, equal_var=False)\n", + "t_stat, p_value_two_sided \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "np.float64(5.0907690611769255e-05)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#code here" + "# 3. Convert the p-value to a one-sided analysis\n", + "p_value_one_sided = p_value_two_sided / 2\n", + "\n", + "p_value_one_sided " ] }, { @@ -313,11 +357,38 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 47, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 8.98137048, 10.43813354, 7.63707816, 13.41744998, 10.01569661,\n", + " 11.47504445]),\n", + " array([1.00269117e-13, 2.52037245e-16, 4.82699849e-11, 1.55146141e-21,\n", + " 2.29493279e-15, 1.04901631e-18]))" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#code here" + "#code herestats_legendary = df[df['Legendary'] == True][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']]\n", + "\n", + "# Filter all rows where 'Legendary' is True\n", + "stats_legendary = df[df['Legendary'] == True][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']]\n", + "# Filter all rows where 'Legendary' is False\n", + "stats_non_legendary = df[df['Legendary'] == False][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']]\n", + "\n", + "# The list of stats to test\n", + "stat_columns = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n", + "\n", + "from scipy import stats\n", + "t_stat, p_value_two_sided = stats.ttest_ind(stats_legendary,stats_non_legendary, equal_var=False)\n", + "t_stat, p_value_two_sided \n", + "\n" ] }, { @@ -337,7 +408,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 48, "metadata": {}, "outputs": [ { @@ -453,7 +524,7 @@ "4 624.0 262.0 1.9250 65500.0 " ] }, - "execution_count": 5, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -498,7 +569,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -512,7 +583,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.13.5" } }, "nbformat": 4,