From 0b7b8e1864131437527803c180ae4ca12c712d29 Mon Sep 17 00:00:00 2001 From: Lewis Clark Date: Tue, 26 Aug 2025 20:20:56 +0200 Subject: [PATCH] lab completed --- .DS_Store | Bin 0 -> 6148 bytes LABShypothesis.ipynb | 660 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 660 insertions(+) create mode 100644 .DS_Store create mode 100644 LABShypothesis.ipynb diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5ff8986e74b24c0ba49e7e98411fa13fdcadabea GIT binary patch literal 6148 zcmeHKJxc>Y5Pf4(1Z=Lbu-soD_z%`27J^;=fSd{n!3#tSyX*Yf=FNv8=OLuB&>5I{ zyL0n)xmVoW0Fd_W;Sy*7Ea-~(&@eT9u0FAo$Ow_nGoJ8{4SKwCb(|^p47a#Ki`9t# zh%KJ!7mOR8`-1Lf-QRcXz9Xmk{AR~pmsP1iDv%1K0;#~iS3r9&ZE@LO92}hvv732L z+$=Gnh~3V7v2sXt%$N$K0%HYczHPMsU(|Hq`\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameType 1Type 2HPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
0BulbasaurGrassPoison4549496565451False
1IvysaurGrassPoison6062638080601False
2VenusaurGrassPoison808283100100801False
3Mega VenusaurGrassPoison80100123122120801False
4CharmanderFireNaN3952436050651False
....................................
795DiancieRockFairy50100150100150506True
796Mega DiancieRockFairy501601101601101106True
797Hoopa ConfinedPsychicGhost8011060150130706True
798Hoopa UnboundPsychicDark8016060170130806True
799VolcanionFireWater8011012013090706True
\n", + "

800 rows × 11 columns

\n", + "" + ], + "text/plain": [ + " Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def \\\n", + "0 Bulbasaur Grass Poison 45 49 49 65 65 \n", + "1 Ivysaur Grass Poison 60 62 63 80 80 \n", + "2 Venusaur Grass Poison 80 82 83 100 100 \n", + "3 Mega Venusaur Grass Poison 80 100 123 122 120 \n", + "4 Charmander Fire NaN 39 52 43 60 50 \n", + ".. ... ... ... .. ... ... ... ... \n", + "795 Diancie Rock Fairy 50 100 150 100 150 \n", + "796 Mega Diancie Rock Fairy 50 160 110 160 110 \n", + "797 Hoopa Confined Psychic Ghost 80 110 60 150 130 \n", + "798 Hoopa Unbound Psychic Dark 80 160 60 170 130 \n", + "799 Volcanion Fire Water 80 110 120 130 90 \n", + "\n", + " Speed Generation Legendary \n", + "0 45 1 False \n", + "1 60 1 False \n", + "2 80 1 False \n", + "3 80 1 False \n", + "4 65 1 False \n", + ".. ... ... ... \n", + "795 50 6 True \n", + "796 110 6 True \n", + "797 70 6 True \n", + "798 80 6 True \n", + "799 70 6 True \n", + "\n", + "[800 rows x 11 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "de2fe498-2618-422f-a9f3-f89b72da1c55", + "metadata": {}, + "outputs": [], + "source": [ + "### h0: mean hp of dragon == mean hp of grass \n", + "### h1: mean hp of dragon != mean hp of grass " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d4006558-ab6c-4c5b-84c0-ac9f7174709f", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import ttest_ind" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "fb75bad9-b595-4b6f-b170-6c3a7b02611e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "t-statistic: 3.3349632905124063\n", + "p-value: 0.0007993609745420599\n" + ] + } + ], + "source": [ + "hp_dragon = df[df['Type 1'] == 'Dragon']['HP']\n", + "hp_grass = df[df['Type 1'] == 'Grass']['HP']\n", + "\n", + "t_stat, p_val = ttest_ind(hp_dragon, hp_grass, equal_var=False, alternative='greater')\n", + "\n", + "print(\"t-statistic:\", t_stat)\n", + "print(\"p-value:\", p_val)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a541e6f4-ed7b-4ac7-ab83-27d1afa267ba", + "metadata": {}, + "outputs": [], + "source": [ + "### reject the null hypothesis, there is statistical evidence to support that dragon type has higher HP " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "efa8bb3e-fa7b-40c1-ad4b-6f1f5d3c1bb9", + "metadata": {}, + "outputs": [], + "source": [ + "### h0: mean stats of legendary == stats of non-legendary \n", + "### h1: mean stats of legendary != stats of non-legendary" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "68c085cb-96a7-4c8b-9ece-4f413710e0d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HPAttackDefenseSp. AtkSp. DefSpeed
t-statistic8.981370e+001.043813e+017.637078e+001.341745e+011.001570e+011.147504e+01
p-value1.002691e-132.520372e-164.826998e-111.551461e-212.294933e-151.049016e-18
\n", + "
" + ], + "text/plain": [ + " HP Attack Defense Sp. Atk \\\n", + "t-statistic 8.981370e+00 1.043813e+01 7.637078e+00 1.341745e+01 \n", + "p-value 1.002691e-13 2.520372e-16 4.826998e-11 1.551461e-21 \n", + "\n", + " Sp. Def Speed \n", + "t-statistic 1.001570e+01 1.147504e+01 \n", + "p-value 2.294933e-15 1.049016e-18 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stats_columns = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n", + "\n", + "legendary = df[df['Legendary'] == True]\n", + "non_legendary = df[df['Legendary'] == False]\n", + "\n", + "results = {}\n", + "for col in stats_columns:\n", + " t_stat, p_val = ttest_ind(legendary[col], non_legendary[col], equal_var=False)\n", + " results[col] = {'t-statistic': t_stat, 'p-value': p_val}\n", + "\n", + "pd.DataFrame(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c944ad75-bc98-45f1-8fd2-5c7f6d401c45", + "metadata": {}, + "outputs": [], + "source": [ + "### reject the null hypothesis, there is statistical evidence to support that legendary pokemon\n", + "### have higher stats in all categories than non-legendary pokemon" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "e9df60ad-8a06-4a09-8728-f1499dcb2d79", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
longitudelatitudehousing_median_agetotal_roomstotal_bedroomspopulationhouseholdsmedian_incomemedian_house_value
0-114.3134.1915.05612.01283.01015.0472.01.493666900.0
1-114.4734.4019.07650.01901.01129.0463.01.820080100.0
2-114.5633.6917.0720.0174.0333.0117.01.650985700.0
3-114.5733.6414.01501.0337.0515.0226.03.191773400.0
4-114.5733.5720.01454.0326.0624.0262.01.925065500.0
\n", + "
" + ], + "text/plain": [ + " longitude latitude housing_median_age total_rooms total_bedrooms \\\n", + "0 -114.31 34.19 15.0 5612.0 1283.0 \n", + "1 -114.47 34.40 19.0 7650.0 1901.0 \n", + "2 -114.56 33.69 17.0 720.0 174.0 \n", + "3 -114.57 33.64 14.0 1501.0 337.0 \n", + "4 -114.57 33.57 20.0 1454.0 326.0 \n", + "\n", + " population households median_income median_house_value \n", + "0 1015.0 472.0 1.4936 66900.0 \n", + "1 1129.0 463.0 1.8200 80100.0 \n", + "2 333.0 117.0 1.6509 85700.0 \n", + "3 515.0 226.0 3.1917 73400.0 \n", + "4 624.0 262.0 1.9250 65500.0 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "e745a5ae-1f88-4792-b1b3-5239ab2f4abb", + "metadata": {}, + "outputs": [], + "source": [ + "### h0: the mean median_house_value close to school/hospital is == the mean median_house_value far to school/hospital \n", + "### h1: he mean median_house_value close to school/hospital is != the mean median_house_value far to school/hospital" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "6b24143f-3a18-469d-9b89-e24f7aa50a4d", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "### coords\n", + "school = (-118, 34)\n", + "hospital = (-122, 37)\n", + "\n", + "### function for euclidean distance\n", + "def euclidean_distance(lon, lat, point):\n", + " return np.sqrt((lon - point[0])**2 + (lat - point[1])**2)\n", + "\n", + "### distances to school and hospital\n", + "df['dist_school'] = euclidean_distance(df['longitude'], df['latitude'], school)\n", + "df['dist_hospital'] = euclidean_distance(df['longitude'], df['latitude'], hospital)\n", + "\n", + "### finding out if house is \"close\" (<0.50)\n", + "df['close_to_facility'] = ((df['dist_school'] < 0.50) | (df['dist_hospital'] < 0.50))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "85873427-e2eb-47eb-ad68-d1933b6e4e1f", + "metadata": {}, + "outputs": [], + "source": [ + "### split df to close and far\n", + "close_houses = df[df['close_to_facility'] == True]['median_house_value']\n", + "far_houses = df[df['close_to_facility'] == False]['median_house_value']" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "f9fc8e5e-7b22-4cca-a929-fa268ff7030c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "t-statistic: 37.992330214201516\n", + "p-value: 1.5032478884296307e-301\n" + ] + } + ], + "source": [ + "t_stat, p_val = ttest_ind(close_houses, far_houses, equal_var=False, alternative='greater')\n", + "\n", + "print(\"t-statistic:\", t_stat)\n", + "print(\"p-value:\", p_val)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "5eed0b7e-893b-4660-aef4-42597d94cf31", + "metadata": {}, + "outputs": [], + "source": [ + "### reject the null hypothesis, there is really strong statistical evidence to support that houses close to a school or hospital are \n", + "### much more expensive than houses farther away" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5366d1b-8718-4e95-85bc-33599391d24a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:base] *", + "language": "python", + "name": "conda-base-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}