From 98347e1603793459bbcb966e183327306291cb52 Mon Sep 17 00:00:00 2001
From: Sneha Mirajkar <sneha.miraj009@gmail.com>
Date: Tue, 26 Nov 2024 15:34:02 +0100
Subject: [PATCH 1/2] Solved lab

---
 11_extraweek/numpy_lab.ipynb | 505 +++++++++++++++++++++++++++++++++++
 1 file changed, 505 insertions(+)
 create mode 100644 11_extraweek/numpy_lab.ipynb

diff --git a/11_extraweek/numpy_lab.ipynb b/11_extraweek/numpy_lab.ipynb
new file mode 100644
index 00000000..f2cde5b8
--- /dev/null
+++ b/11_extraweek/numpy_lab.ipynb
@@ -0,0 +1,505 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "effaacf9-23f7-4aa2-a67c-f2adccde0884",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#### 1. Import the numpy package under the name `np` (★☆☆)\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "8b510dfa-af90-425e-82c0-e508505c030c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#### 2. Create a null vector of size 10 (★☆☆)\n",
+    "a = np.zeros(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "c2061fba-0a84-4b2d-b56e-5b02970dfccf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#### 3. Create a null vector of size 10 but the fifth value which is 1 (★☆☆)\n",
+    "a[4] = 1\n",
+    "print(a)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "d03d7741-c3cc-41aa-9e2b-477aa83d8084",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n",
+       "       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,\n",
+       "       44, 45, 46, 47, 48])"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#### 4. Create a vector with values ranging from 10 to 49 (★☆☆)\n",
+    "np.arange(10, 49)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "3d5da091-5170-49c4-84f0-b543f5086fc8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0 1 2]\n",
+      " [3 4 5]\n",
+      " [6 7 8]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#### 5. Create a 3x3 matrix with values ranging from 0 to 8 (★☆☆)\n",
+    "matrix = np.arange(9).reshape(3, 3)\n",
+    "print(matrix)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "id": "3b57867c-d947-4285-91d4-03db0e51b6ad",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(array([0, 1, 4]),)"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#### 6. Find indices of non-zero elements from [1,2,0,0,4,0] (★☆☆)\n",
+    "indices = np.nonzero([1, 2, 0, 0, 4, 0])\n",
+    "indices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "312dc21b-c984-44a5-ac7a-547e44e4a3d9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[1., 0., 0.],\n",
+       "       [0., 1., 0.],\n",
+       "       [0., 0., 1.]])"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#### 7. Create a 3x3 identity matrix (★☆☆)\n",
+    "identity_matrix = np.eye(3)\n",
+    "identity_matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "2defee36-7973-4620-8a59-52dc29056785",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[[0.3870531 , 0.80584844, 0.07273376],\n",
+       "        [0.44373356, 0.23515821, 0.58701712],\n",
+       "        [0.1022437 , 0.95173192, 0.80854401]],\n",
+       "\n",
+       "       [[0.50097644, 0.41147454, 0.14448391],\n",
+       "        [0.59308041, 0.96911658, 0.33521281],\n",
+       "        [0.99377117, 0.56492588, 0.52264317]],\n",
+       "\n",
+       "       [[0.09308014, 0.34511664, 0.27778146],\n",
+       "        [0.24754032, 0.61391195, 0.47991588],\n",
+       "        [0.58216247, 0.84256068, 0.69703141]]])"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#### 8. Create a 3x3x3 array with random values (★☆☆)\n",
+    "a = np.random.random((3, 3, 3))\n",
+    "a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "e99fe9f8-1342-4944-82e2-17431b14a112",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Array:\n",
+      " [[0.44888958 0.24822949 0.48523876 0.39950067 0.90886907 0.273852\n",
+      "  0.96535669 0.23705135 0.66737579 0.4586097 ]\n",
+      " [0.93484025 0.16812102 0.37901467 0.64397372 0.99674691 0.83416266\n",
+      "  0.75811574 0.43992663 0.29520097 0.8042069 ]\n",
+      " [0.44764757 0.74979964 0.37700696 0.46867089 0.42613137 0.47076659\n",
+      "  0.27159407 0.34187485 0.63035435 0.05415954]\n",
+      " [0.9168444  0.43584292 0.63399993 0.55738403 0.52632792 0.73063377\n",
+      "  0.08563815 0.29890071 0.49856096 0.49651274]\n",
+      " [0.33613414 0.23033927 0.19862476 0.74187077 0.98319937 0.71150484\n",
+      "  0.76303867 0.39146266 0.35568754 0.85016601]\n",
+      " [0.67000798 0.69453112 0.31374979 0.91780016 0.60455868 0.96242734\n",
+      "  0.00164459 0.69218637 0.91192824 0.65545409]\n",
+      " [0.46860237 0.79551149 0.00240227 0.32362102 0.66288596 0.74438755\n",
+      "  0.89495758 0.53297736 0.17014172 0.31210354]\n",
+      " [0.76236942 0.49962477 0.74071791 0.31165076 0.76098128 0.7336446\n",
+      "  0.20528331 0.2133404  0.16379488 0.61550025]\n",
+      " [0.72620593 0.43846015 0.9291523  0.96925747 0.34692358 0.9202839\n",
+      "  0.0733022  0.09827121 0.25730207 0.81283081]\n",
+      " [0.34884509 0.69166567 0.42707941 0.71557261 0.14256195 0.84357882\n",
+      "  0.22133587 0.12991067 0.43315728 0.35966784]]\n",
+      "Minimum value: 0.0016445889237528544\n",
+      "Maximum value: 0.9967469097632642\n"
+     ]
+    }
+   ],
+   "source": [
+    "#### 9. Create a 10x10 array with random values and find the minimum and maximum values (★☆☆)\n",
+    "array = np.random.random((10, 10))\n",
+    "min_value = array.min()\n",
+    "max_value = array.max()\n",
+    "\n",
+    "print(\"Array:\\n\", array)\n",
+    "print(\"Minimum value:\", min_value)\n",
+    "print(\"Maximum value:\", max_value)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "dff334c4-4869-4ca9-a070-41a20aca75a3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Random vector: [0.7553504  0.73291313 0.60003104 0.95951908 0.48793467 0.77819656\n",
+      " 0.32548594 0.76734394 0.41821003 0.57698546 0.90300161 0.49787616\n",
+      " 0.18469302 0.99104763 0.56237424 0.90551881 0.96599239 0.58961395\n",
+      " 0.44788498 0.79240466 0.84284137 0.67551951 0.30127043 0.64307012\n",
+      " 0.41979998 0.72715337 0.33000978 0.05704993 0.29237401 0.71910426]\n",
+      "Mean value: 0.6083523494334391\n"
+     ]
+    }
+   ],
+   "source": [
+    "#### 10. Create a random vector of size 30 and find the mean value (★☆☆)\n",
+    "vector = np.random.random(30)\n",
+    "mean_value = vector.mean()\n",
+    "\n",
+    "print(\"Random vector:\", vector)\n",
+    "print(\"Mean value:\", mean_value)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "300e3464-e06f-460a-88ef-fed6ca128331",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0, 0, 0, 0, 0],\n",
+       "       [1, 0, 0, 0, 0],\n",
+       "       [0, 2, 0, 0, 0],\n",
+       "       [0, 0, 3, 0, 0],\n",
+       "       [0, 0, 0, 4, 0]])"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#### 11. Create a 5x5 matrix with values 1,2,3,4 just below the diagonal \n",
+    "matrix = np.diag([1, 2, 3, 4], k=-1)\n",
+    "matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "5fba68e6-0168-4e8f-ba3f-1aab473cc7c1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original Matrix:\n",
+      " [[0.85875897 0.22240611 0.90435921 0.59661399 0.55217872]\n",
+      " [0.72130315 0.35299    0.39948601 0.32900052 0.56819445]\n",
+      " [0.40746585 0.10206293 0.59162321 0.97445781 0.92282682]\n",
+      " [0.47713151 0.8891837  0.73919479 0.57353725 0.69718377]\n",
+      " [0.90779969 0.1153405  0.39349924 0.43477463 0.82500162]]\n",
+      "Normalized Matrix:\n",
+      " [[ 4.49850181e-01 -1.74145138e-01  4.94564899e-01  1.92795877e-01\n",
+      "   1.49223511e-01]\n",
+      " [ 3.15063662e-01 -4.60970997e-02 -5.04004740e-04 -6.96207134e-02\n",
+      "   1.64928227e-01]\n",
+      " [ 7.32086987e-03 -2.92151334e-01  1.87902016e-01  5.63302232e-01\n",
+      "   5.12673881e-01]\n",
+      " [ 7.56336676e-02  4.79684081e-01  3.32607859e-01  1.70167278e-01\n",
+      "   2.91412661e-01]\n",
+      " [ 4.97938561e-01 -2.79131601e-01 -6.37451904e-03  3.40993272e-02\n",
+      "   4.16748372e-01]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#### 12. Normalize a 5x5 random matrix (★☆☆)\n",
+    "x = np.random.random((5, 5))\n",
+    "\n",
+    "mean = matrix.mean()\n",
+    "std = matrix.std()\n",
+    "normalized_matrix = (x - mean) / std\n",
+    "\n",
+    "print(\"Original Matrix:\\n\", x)\n",
+    "print(\"Normalized Matrix:\\n\", normalized_matrix)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "cea567c8-977f-4836-be46-995945b9c9b8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Common values: [4 5]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#### 13. How to find common values between two arrays? (★☆☆)\n",
+    "array1 = np.array([1, 2, 3, 4, 5])\n",
+    "array2 = np.array([4, 5, 6, 7, 8])\n",
+    "\n",
+    "common_values = np.intersect1d(array1, array2)\n",
+    "print(\"Common values:\", common_values)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "id": "a16a655a-70d7-4191-a07e-5c9cb4b5af5d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Random vector: [0.85259492 0.96371159 0.22659637 0.50285043 0.3846001  0.98747817\n",
+      " 0.51709178 0.79936513 0.91346303 0.97339083]\n",
+      "Sorted vector: [0.00489111 0.06603625 0.2338977  0.32320287 0.5612443  0.5651483\n",
+      " 0.62776496 0.66486868 0.75091913 0.83305738]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#### 14. Create a random vector of size 10 and sort it (★★☆)\n",
+    "random_vector = np.random.random(10)\n",
+    "print(\"Random vector:\", random_vector)\n",
+    "\n",
+    "sorted_vector = np.sort(vector)\n",
+    "print(\"Sorted vector:\", sorted_vector)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "id": "73d912d7-9826-482f-bf5a-91c6e54b1907",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Replaced vector: [0.66916179 0.82449374 0.         0.89304388 0.66205636 0.2649368\n",
+      " 0.15042739 0.09368878 0.75523275 0.07910201]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#### 15. Create random vector of size 10 and replace the maximum value by 0 (★★☆)\n",
+    "random_vector = np.random.random(10)\n",
+    "max_index = np.argmax(random_vector)\n",
+    "\n",
+    "random_vector[max_index] = 0\n",
+    "print(\"Replaced vector:\", random_vector)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "id": "71c757dd-6c73-45a5-aadf-57b30bdc2920",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original Matrix:\n",
+      " [[0.50132499 0.00562021 0.04496398]\n",
+      " [0.86834639 0.69795805 0.67464636]\n",
+      " [0.81034912 0.58344356 0.89992663]]\n",
+      "Matrix after subtracting row means:\n",
+      " [[ 0.31735526 -0.17834951 -0.13900574]\n",
+      " [ 0.12136279 -0.04902555 -0.07233724]\n",
+      " [ 0.04577602 -0.18112954  0.13535353]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#### 16. Subtract the mean of each row of a matrix (★★☆)\n",
+    "matrix = np.random.random((3, 3))\n",
+    "\n",
+    "row_means = matrix.mean(axis=1, keepdims=True)\n",
+    "normalized_matrix = matrix - row_means\n",
+    "\n",
+    "print(\"Original Matrix:\\n\", matrix)\n",
+    "print(\"Matrix after subtracting row means:\\n\", normalized_matrix)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "id": "ad16e3b9-abd0-44c0-9939-3aeba63ba7b1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "n largest values using np.argsort: [9995 9996 9997 9998 9999]\n",
+      "n largest values using np.argpartition: [9995 9996 9997 9998 9999]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#### 17. How to get the n largest values of an array (★★★)\n",
+    "Z = np.arange(10000)\n",
+    "np.random.shuffle(Z)\n",
+    "\n",
+    "n = 5\n",
+    "largest_values = Z[np.argsort(Z)[-n:]]\n",
+    "print(\"n largest values using np.argsort:\", largest_values)\n",
+    "\n",
+    "largest_values_argpartition = Z[np.argpartition(Z, -n)[-n:]]\n",
+    "print(\"n largest values using np.argpartition:\", largest_values_argpartition)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "id": "d2bc3dda-058d-4b4c-8c2e-996ddc837197",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original Matrix:\n",
+      " [[6.74592727 9.41374603 0.50193516]\n",
+      " [7.75800139 4.03455802 9.98238291]\n",
+      " [1.68860919 2.1726451  6.19268338]\n",
+      " [6.52206625 2.35033936 3.31648573]\n",
+      " [2.94181241 6.62250044 3.98363747]]\n",
+      "Modified Matrix:\n",
+      " [[45.50753471 88.61861437  0.50193516]\n",
+      " [60.18658554 16.27765843 99.64796853]\n",
+      " [ 1.68860919  2.1726451  38.34932747]\n",
+      " [42.53734812  2.35033936  3.31648573]\n",
+      " [ 2.94181241 43.85751211  3.98363747]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#### 18. Create a random 5*3 matrix and replace items that are larger than 4 by their squares ( Example:  6 --> 36) \n",
+    "matrix = np.random.random((5, 3)) * 10\n",
+    "matrix_modified = np.where(matrix > 4, matrix**2, matrix)\n",
+    "\n",
+    "print(\"Original Matrix:\\n\", matrix)\n",
+    "print(\"Modified Matrix:\\n\", matrix_modified)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From bd25d60cfbca562586a28b19242dcd9c5e800f22 Mon Sep 17 00:00:00 2001
From: Sneha Mirajkar <sneha.miraj009@gmail.com>
Date: Mon, 2 Dec 2024 15:17:54 +0100
Subject: [PATCH 2/2] BiqQuery_lab

---
 11_extraweek/BigQuery_lab.sql | 73 +++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 11_extraweek/BigQuery_lab.sql

diff --git a/11_extraweek/BigQuery_lab.sql b/11_extraweek/BigQuery_lab.sql
new file mode 100644
index 00000000..812b2856
--- /dev/null
+++ b/11_extraweek/BigQuery_lab.sql
@@ -0,0 +1,73 @@
+
+-- Using the NYC Taxi public dataset (Yellow Trips) from Google BigQuery, complete the following exercises:
+
+
+
+-- Exercise 1: Count the number of trips in January 2021
+SELECT  COUNT(*) 
+FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`;
+
+-- Exercise 2: Calculate the total revenue generated by taxi trips in 2021
+SELECT SUM(total_amount) 
+FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`
+
+-- Exercise 3: Find the most popular pickup location
+SELECT pickup_location_id, 
+COUNT(*) AS trip_count 
+FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021` 
+GROUP BY pickup_location_id 
+ORDER BY trip_count DESC;
+
+-- Exercise 4: Analyze the number of trips per hour of the day
+SELECT pickup_location_id, 
+COUNT(*) AS trip_count,
+EXTRACT(DAY FROM pickup_datetime) AS day,
+EXTRACT(HOUR FROM pickup_datetime) AS hour
+FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`
+GROUP BY pickup_location_id, day, hour
+ORDER BY pickup_location_id, day, hour;
+
+-- Exercise 5: Calculate the average trip distance
+SELECT AVG(trip_distance) 
+FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`;
+
+-- Exercise 6: Find the longest trip by distance
+SELECT pickup_location_id, MAX(trip_distance) AS longest_trip
+FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`
+GROUP BY pickup_location_id
+ORDER BY longest_trip DESC;
+
+
+SELECT pickup_location_id, trip_distance
+FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`
+ORDER BY trip_distance DESC;
+
+
+-- Exercise 7: Calculate the total number of passengers by payment type
+SELECT payment_type, SUM(passenger_count) AS passenger_count
+FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`
+GROUP BY payment_type
+ORDER BY payment_type;
+
+-- Exercise 8: Find the most common drop-off location for trips paid by credit card
+SELECT dropoff_location_id, 
+COUNT(*) AS trip_count,
+FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`
+WHERE payment_type = '1'
+GROUP BY dropoff_location_id 
+ORDER BY trip_count DESC;
+
+-- Exercise 9: Calculate the total number of trips that had more than 4 passengers
+SELECT 
+COUNT(*) AS trip_count,
+SUM(passenger_count) AS passenger_count
+FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`
+WHERE passenger_count > 4;
+
+-- Exercise 10: Subquery - Find the average fare for trips longer than the average trip distance
+
+SELECT AVG(fare_amount) AS avg_fare
+FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`
+WHERE trip_distance > (
+    SELECT AVG(trip_distance)
+    FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021`);