From 98347e1603793459bbcb966e183327306291cb52 Mon Sep 17 00:00:00 2001 From: Sneha Mirajkar Date: Tue, 26 Nov 2024 15:34:02 +0100 Subject: [PATCH 1/2] Solved lab --- 11_extraweek/numpy_lab.ipynb | 505 +++++++++++++++++++++++++++++++++++ 1 file changed, 505 insertions(+) create mode 100644 11_extraweek/numpy_lab.ipynb diff --git a/11_extraweek/numpy_lab.ipynb b/11_extraweek/numpy_lab.ipynb new file mode 100644 index 00000000..f2cde5b8 --- /dev/null +++ b/11_extraweek/numpy_lab.ipynb @@ -0,0 +1,505 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "id": "effaacf9-23f7-4aa2-a67c-f2adccde0884", + "metadata": {}, + "outputs": [], + "source": [ + "#### 1. Import the numpy package under the name `np` (★☆☆)\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8b510dfa-af90-425e-82c0-e508505c030c", + "metadata": {}, + "outputs": [], + "source": [ + "#### 2. Create a null vector of size 10 (★☆☆)\n", + "a = np.zeros(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "c2061fba-0a84-4b2d-b56e-5b02970dfccf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]\n" + ] + } + ], + "source": [ + "#### 3. Create a null vector of size 10 but the fifth value which is 1 (★☆☆)\n", + "a[4] = 1\n", + "print(a)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "d03d7741-c3cc-41aa-9e2b-477aa83d8084", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n", + " 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,\n", + " 44, 45, 46, 47, 48])" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#### 4. Create a vector with values ranging from 10 to 49 (★☆☆)\n", + "np.arange(10, 49)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "3d5da091-5170-49c4-84f0-b543f5086fc8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0 1 2]\n", + " [3 4 5]\n", + " [6 7 8]]\n" + ] + } + ], + "source": [ + "#### 5. Create a 3x3 matrix with values ranging from 0 to 8 (★☆☆)\n", + "matrix = np.arange(9).reshape(3, 3)\n", + "print(matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "3b57867c-d947-4285-91d4-03db0e51b6ad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0, 1, 4]),)" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#### 6. Find indices of non-zero elements from [1,2,0,0,4,0] (★☆☆)\n", + "indices = np.nonzero([1, 2, 0, 0, 4, 0])\n", + "indices" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "312dc21b-c984-44a5-ac7a-547e44e4a3d9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 0., 0.],\n", + " [0., 1., 0.],\n", + " [0., 0., 1.]])" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#### 7. Create a 3x3 identity matrix (★☆☆)\n", + "identity_matrix = np.eye(3)\n", + "identity_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "2defee36-7973-4620-8a59-52dc29056785", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[[0.3870531 , 0.80584844, 0.07273376],\n", + " [0.44373356, 0.23515821, 0.58701712],\n", + " [0.1022437 , 0.95173192, 0.80854401]],\n", + "\n", + " [[0.50097644, 0.41147454, 0.14448391],\n", + " [0.59308041, 0.96911658, 0.33521281],\n", + " [0.99377117, 0.56492588, 0.52264317]],\n", + "\n", + " [[0.09308014, 0.34511664, 0.27778146],\n", + " [0.24754032, 0.61391195, 0.47991588],\n", + " [0.58216247, 0.84256068, 0.69703141]]])" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#### 8. Create a 3x3x3 array with random values (★☆☆)\n", + "a = np.random.random((3, 3, 3))\n", + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "e99fe9f8-1342-4944-82e2-17431b14a112", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Array:\n", + " [[0.44888958 0.24822949 0.48523876 0.39950067 0.90886907 0.273852\n", + " 0.96535669 0.23705135 0.66737579 0.4586097 ]\n", + " [0.93484025 0.16812102 0.37901467 0.64397372 0.99674691 0.83416266\n", + " 0.75811574 0.43992663 0.29520097 0.8042069 ]\n", + " [0.44764757 0.74979964 0.37700696 0.46867089 0.42613137 0.47076659\n", + " 0.27159407 0.34187485 0.63035435 0.05415954]\n", + " [0.9168444 0.43584292 0.63399993 0.55738403 0.52632792 0.73063377\n", + " 0.08563815 0.29890071 0.49856096 0.49651274]\n", + " [0.33613414 0.23033927 0.19862476 0.74187077 0.98319937 0.71150484\n", + " 0.76303867 0.39146266 0.35568754 0.85016601]\n", + " [0.67000798 0.69453112 0.31374979 0.91780016 0.60455868 0.96242734\n", + " 0.00164459 0.69218637 0.91192824 0.65545409]\n", + " [0.46860237 0.79551149 0.00240227 0.32362102 0.66288596 0.74438755\n", + " 0.89495758 0.53297736 0.17014172 0.31210354]\n", + " [0.76236942 0.49962477 0.74071791 0.31165076 0.76098128 0.7336446\n", + " 0.20528331 0.2133404 0.16379488 0.61550025]\n", + " [0.72620593 0.43846015 0.9291523 0.96925747 0.34692358 0.9202839\n", + " 0.0733022 0.09827121 0.25730207 0.81283081]\n", + " [0.34884509 0.69166567 0.42707941 0.71557261 0.14256195 0.84357882\n", + " 0.22133587 0.12991067 0.43315728 0.35966784]]\n", + "Minimum value: 0.0016445889237528544\n", + "Maximum value: 0.9967469097632642\n" + ] + } + ], + "source": [ + "#### 9. Create a 10x10 array with random values and find the minimum and maximum values (★☆☆)\n", + "array = np.random.random((10, 10))\n", + "min_value = array.min()\n", + "max_value = array.max()\n", + "\n", + "print(\"Array:\\n\", array)\n", + "print(\"Minimum value:\", min_value)\n", + "print(\"Maximum value:\", max_value)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "dff334c4-4869-4ca9-a070-41a20aca75a3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random vector: [0.7553504 0.73291313 0.60003104 0.95951908 0.48793467 0.77819656\n", + " 0.32548594 0.76734394 0.41821003 0.57698546 0.90300161 0.49787616\n", + " 0.18469302 0.99104763 0.56237424 0.90551881 0.96599239 0.58961395\n", + " 0.44788498 0.79240466 0.84284137 0.67551951 0.30127043 0.64307012\n", + " 0.41979998 0.72715337 0.33000978 0.05704993 0.29237401 0.71910426]\n", + "Mean value: 0.6083523494334391\n" + ] + } + ], + "source": [ + "#### 10. Create a random vector of size 30 and find the mean value (★☆☆)\n", + "vector = np.random.random(30)\n", + "mean_value = vector.mean()\n", + "\n", + "print(\"Random vector:\", vector)\n", + "print(\"Mean value:\", mean_value)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "300e3464-e06f-460a-88ef-fed6ca128331", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0, 0, 0, 0, 0],\n", + " [1, 0, 0, 0, 0],\n", + " [0, 2, 0, 0, 0],\n", + " [0, 0, 3, 0, 0],\n", + " [0, 0, 0, 4, 0]])" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#### 11. Create a 5x5 matrix with values 1,2,3,4 just below the diagonal \n", + "matrix = np.diag([1, 2, 3, 4], k=-1)\n", + "matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "5fba68e6-0168-4e8f-ba3f-1aab473cc7c1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original Matrix:\n", + " [[0.85875897 0.22240611 0.90435921 0.59661399 0.55217872]\n", + " [0.72130315 0.35299 0.39948601 0.32900052 0.56819445]\n", + " [0.40746585 0.10206293 0.59162321 0.97445781 0.92282682]\n", + " [0.47713151 0.8891837 0.73919479 0.57353725 0.69718377]\n", + " [0.90779969 0.1153405 0.39349924 0.43477463 0.82500162]]\n", + "Normalized Matrix:\n", + " [[ 4.49850181e-01 -1.74145138e-01 4.94564899e-01 1.92795877e-01\n", + " 1.49223511e-01]\n", + " [ 3.15063662e-01 -4.60970997e-02 -5.04004740e-04 -6.96207134e-02\n", + " 1.64928227e-01]\n", + " [ 7.32086987e-03 -2.92151334e-01 1.87902016e-01 5.63302232e-01\n", + " 5.12673881e-01]\n", + " [ 7.56336676e-02 4.79684081e-01 3.32607859e-01 1.70167278e-01\n", + " 2.91412661e-01]\n", + " [ 4.97938561e-01 -2.79131601e-01 -6.37451904e-03 3.40993272e-02\n", + " 4.16748372e-01]]\n" + ] + } + ], + "source": [ + "#### 12. Normalize a 5x5 random matrix (★☆☆)\n", + "x = np.random.random((5, 5))\n", + "\n", + "mean = matrix.mean()\n", + "std = matrix.std()\n", + "normalized_matrix = (x - mean) / std\n", + "\n", + "print(\"Original Matrix:\\n\", x)\n", + "print(\"Normalized Matrix:\\n\", normalized_matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "cea567c8-977f-4836-be46-995945b9c9b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Common values: [4 5]\n" + ] + } + ], + "source": [ + "#### 13. How to find common values between two arrays? (★☆☆)\n", + "array1 = np.array([1, 2, 3, 4, 5])\n", + "array2 = np.array([4, 5, 6, 7, 8])\n", + "\n", + "common_values = np.intersect1d(array1, array2)\n", + "print(\"Common values:\", common_values)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "a16a655a-70d7-4191-a07e-5c9cb4b5af5d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random vector: [0.85259492 0.96371159 0.22659637 0.50285043 0.3846001 0.98747817\n", + " 0.51709178 0.79936513 0.91346303 0.97339083]\n", + "Sorted vector: [0.00489111 0.06603625 0.2338977 0.32320287 0.5612443 0.5651483\n", + " 0.62776496 0.66486868 0.75091913 0.83305738]\n" + ] + } + ], + "source": [ + "#### 14. Create a random vector of size 10 and sort it (★★☆)\n", + "random_vector = np.random.random(10)\n", + "print(\"Random vector:\", random_vector)\n", + "\n", + "sorted_vector = np.sort(vector)\n", + "print(\"Sorted vector:\", sorted_vector)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "73d912d7-9826-482f-bf5a-91c6e54b1907", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Replaced vector: [0.66916179 0.82449374 0. 0.89304388 0.66205636 0.2649368\n", + " 0.15042739 0.09368878 0.75523275 0.07910201]\n" + ] + } + ], + "source": [ + "#### 15. Create random vector of size 10 and replace the maximum value by 0 (★★☆)\n", + "random_vector = np.random.random(10)\n", + "max_index = np.argmax(random_vector)\n", + "\n", + "random_vector[max_index] = 0\n", + "print(\"Replaced vector:\", random_vector)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "71c757dd-6c73-45a5-aadf-57b30bdc2920", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original Matrix:\n", + " [[0.50132499 0.00562021 0.04496398]\n", + " [0.86834639 0.69795805 0.67464636]\n", + " [0.81034912 0.58344356 0.89992663]]\n", + "Matrix after subtracting row means:\n", + " [[ 0.31735526 -0.17834951 -0.13900574]\n", + " [ 0.12136279 -0.04902555 -0.07233724]\n", + " [ 0.04577602 -0.18112954 0.13535353]]\n" + ] + } + ], + "source": [ + "#### 16. Subtract the mean of each row of a matrix (★★☆)\n", + "matrix = np.random.random((3, 3))\n", + "\n", + "row_means = matrix.mean(axis=1, keepdims=True)\n", + "normalized_matrix = matrix - row_means\n", + "\n", + "print(\"Original Matrix:\\n\", matrix)\n", + "print(\"Matrix after subtracting row means:\\n\", normalized_matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "ad16e3b9-abd0-44c0-9939-3aeba63ba7b1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "n largest values using np.argsort: [9995 9996 9997 9998 9999]\n", + "n largest values using np.argpartition: [9995 9996 9997 9998 9999]\n" + ] + } + ], + "source": [ + "#### 17. How to get the n largest values of an array (★★★)\n", + "Z = np.arange(10000)\n", + "np.random.shuffle(Z)\n", + "\n", + "n = 5\n", + "largest_values = Z[np.argsort(Z)[-n:]]\n", + "print(\"n largest values using np.argsort:\", largest_values)\n", + "\n", + "largest_values_argpartition = Z[np.argpartition(Z, -n)[-n:]]\n", + "print(\"n largest values using np.argpartition:\", largest_values_argpartition)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "d2bc3dda-058d-4b4c-8c2e-996ddc837197", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original Matrix:\n", + " [[6.74592727 9.41374603 0.50193516]\n", + " [7.75800139 4.03455802 9.98238291]\n", + " [1.68860919 2.1726451 6.19268338]\n", + " [6.52206625 2.35033936 3.31648573]\n", + " [2.94181241 6.62250044 3.98363747]]\n", + "Modified Matrix:\n", + " [[45.50753471 88.61861437 0.50193516]\n", + " [60.18658554 16.27765843 99.64796853]\n", + " [ 1.68860919 2.1726451 38.34932747]\n", + " [42.53734812 2.35033936 3.31648573]\n", + " [ 2.94181241 43.85751211 3.98363747]]\n" + ] + } + ], + "source": [ + "#### 18. Create a random 5*3 matrix and replace items that are larger than 4 by their squares ( Example: 6 --> 36) \n", + "matrix = np.random.random((5, 3)) * 10\n", + "matrix_modified = np.where(matrix > 4, matrix**2, matrix)\n", + "\n", + "print(\"Original Matrix:\\n\", matrix)\n", + "print(\"Modified Matrix:\\n\", matrix_modified)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From bd25d60cfbca562586a28b19242dcd9c5e800f22 Mon Sep 17 00:00:00 2001 From: Sneha Mirajkar Date: Mon, 2 Dec 2024 15:17:54 +0100 Subject: [PATCH 2/2] BiqQuery_lab --- 11_extraweek/BigQuery_lab.sql | 73 +++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 11_extraweek/BigQuery_lab.sql diff --git a/11_extraweek/BigQuery_lab.sql b/11_extraweek/BigQuery_lab.sql new file mode 100644 index 00000000..812b2856 --- /dev/null +++ b/11_extraweek/BigQuery_lab.sql @@ -0,0 +1,73 @@ + +-- Using the NYC Taxi public dataset (Yellow Trips) from Google BigQuery, complete the following exercises: + + + +-- Exercise 1: Count the number of trips in January 2021 +SELECT COUNT(*) +FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`; + +-- Exercise 2: Calculate the total revenue generated by taxi trips in 2021 +SELECT SUM(total_amount) +FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021` + +-- Exercise 3: Find the most popular pickup location +SELECT pickup_location_id, +COUNT(*) AS trip_count +FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021` +GROUP BY pickup_location_id +ORDER BY trip_count DESC; + +-- Exercise 4: Analyze the number of trips per hour of the day +SELECT pickup_location_id, +COUNT(*) AS trip_count, +EXTRACT(DAY FROM pickup_datetime) AS day, +EXTRACT(HOUR FROM pickup_datetime) AS hour +FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021` +GROUP BY pickup_location_id, day, hour +ORDER BY pickup_location_id, day, hour; + +-- Exercise 5: Calculate the average trip distance +SELECT AVG(trip_distance) +FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`; + +-- Exercise 6: Find the longest trip by distance +SELECT pickup_location_id, MAX(trip_distance) AS longest_trip +FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021` +GROUP BY pickup_location_id +ORDER BY longest_trip DESC; + + +SELECT pickup_location_id, trip_distance +FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021` +ORDER BY trip_distance DESC; + + +-- Exercise 7: Calculate the total number of passengers by payment type +SELECT payment_type, SUM(passenger_count) AS passenger_count +FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021` +GROUP BY payment_type +ORDER BY payment_type; + +-- Exercise 8: Find the most common drop-off location for trips paid by credit card +SELECT dropoff_location_id, +COUNT(*) AS trip_count, +FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021` +WHERE payment_type = '1' +GROUP BY dropoff_location_id +ORDER BY trip_count DESC; + +-- Exercise 9: Calculate the total number of trips that had more than 4 passengers +SELECT +COUNT(*) AS trip_count, +SUM(passenger_count) AS passenger_count +FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021` +WHERE passenger_count > 4; + +-- Exercise 10: Subquery - Find the average fare for trips longer than the average trip distance + +SELECT AVG(fare_amount) AS avg_fare +FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021` +WHERE trip_distance > ( + SELECT AVG(trip_distance) + FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`);