From 189f0002eeb22c0e44dcd83ba5b426a3beb7650c Mon Sep 17 00:00:00 2001 From: PriyaArungit Date: Thu, 24 Jul 2025 17:20:52 +0200 Subject: [PATCH] Filter reduce day 3 --- your-code/main.ipynb | 142 ++++++++++++++++++++++++++++++++----------- 1 file changed, 106 insertions(+), 36 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 9f0e67b..63c4f35 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,9 +12,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 49, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ValueError", + "evalue": "numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[49], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mfunctools\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m reduce\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnumpy\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\n", + "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/pandas/__init__.py:22\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m _hard_dependencies, _dependency, _missing_dependencies\n\u001b[1;32m 21\u001b[0m \u001b[38;5;66;03m# numpy compat\u001b[39;00m\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcompat\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m is_numpy_dev \u001b[38;5;28;01mas\u001b[39;00m _is_numpy_dev \u001b[38;5;66;03m# pyright: ignore # noqa:F401\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_libs\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m hashtable \u001b[38;5;28;01mas\u001b[39;00m _hashtable, lib \u001b[38;5;28;01mas\u001b[39;00m _lib, tslib \u001b[38;5;28;01mas\u001b[39;00m _tslib\n", + "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/pandas/compat/__init__.py:18\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtyping\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_typing\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m F\n\u001b[0;32m---> 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcompat\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 19\u001b[0m is_numpy_dev,\n\u001b[1;32m 20\u001b[0m np_version_under1p21,\n\u001b[1;32m 21\u001b[0m )\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcompat\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyarrow\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 23\u001b[0m pa_version_under1p01,\n\u001b[1;32m 24\u001b[0m pa_version_under2p0,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 31\u001b[0m pa_version_under9p0,\n\u001b[1;32m 32\u001b[0m )\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m TYPE_CHECKING:\n", + "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/pandas/compat/numpy/__init__.py:4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\" support numpy compatibility across versions \"\"\"\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mversion\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Version\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# numpy versioning\u001b[39;00m\n\u001b[1;32m 7\u001b[0m _np_version \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39m__version__\n", + "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/pandas/util/__init__.py:2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# pyright: reportUnusedImport = false\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_decorators\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ( \u001b[38;5;66;03m# noqa:F401\u001b[39;00m\n\u001b[1;32m 3\u001b[0m Appender,\n\u001b[1;32m 4\u001b[0m Substitution,\n\u001b[1;32m 5\u001b[0m cache_readonly,\n\u001b[1;32m 6\u001b[0m )\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhashing\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ( \u001b[38;5;66;03m# noqa:F401\u001b[39;00m\n\u001b[1;32m 9\u001b[0m hash_array,\n\u001b[1;32m 10\u001b[0m hash_pandas_object,\n\u001b[1;32m 11\u001b[0m )\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__getattr__\u001b[39m(name):\n", + "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/pandas/util/_decorators.py:14\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtyping\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 7\u001b[0m Any,\n\u001b[1;32m 8\u001b[0m Callable,\n\u001b[1;32m 9\u001b[0m Mapping,\n\u001b[1;32m 10\u001b[0m cast,\n\u001b[1;32m 11\u001b[0m )\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mwarnings\u001b[39;00m\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_libs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mproperties\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m cache_readonly\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_typing\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 16\u001b[0m F,\n\u001b[1;32m 17\u001b[0m T,\n\u001b[1;32m 18\u001b[0m )\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_exceptions\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m find_stack_level\n", + "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/pandas/_libs/__init__.py:13\u001b[0m\n\u001b[1;32m 1\u001b[0m __all__ \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNaT\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNaTType\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInterval\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 10\u001b[0m ]\n\u001b[0;32m---> 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_libs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minterval\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Interval\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_libs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtslibs\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 15\u001b[0m NaT,\n\u001b[1;32m 16\u001b[0m NaTType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 21\u001b[0m iNaT,\n\u001b[1;32m 22\u001b[0m )\n", + "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/pandas/_libs/interval.pyx:1\u001b[0m, in \u001b[0;36minit pandas._libs.interval\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject" + ] + } + ], "source": [ "# Import reduce from functools, numpy and pandas\n", "from functools import reduce\n", @@ -35,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "metadata": {}, "outputs": [], "source": [ @@ -48,9 +67,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 62, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "13637" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "len(prophet)" ] @@ -66,11 +96,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ - "# your code here" + "prophet = prophet[568:]\n" ] }, { @@ -82,11 +112,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['the{7', 'chosen', 'and', 'the\\nbeloved', 'who', 'was', 'a', 'dawn', 'unto', 'his']\n" + ] + } + ], + "source": [ + "import string\n", + "\n", + "# Function to clean a single word\n", + "def clean_word(word):\n", + " # Remove punctuation from the start and end of the word\n", + " return word.strip(string.punctuation).lower()\n", + "\n", + "# Apply the function to all words using map\n", + "cleaned_prophet = list(map(clean_word, prophet))\n", + "\n", + "# Let's see words 1-10 after cleaning\n", + "print(cleaned_prophet[1:11])\n" ] }, { @@ -100,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -114,16 +163,24 @@ " Output: 'the'\n", " '''\n", " \n", - " # your code here" + " return x.split('{')[0]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 54, "metadata": {}, "outputs": [], "source": [ - "# your code here" + "import string\n", + "\n", + "def clean_word(word):\n", + " word = reference(word) # Remove references\n", + " word = word.strip(string.punctuation) # Remove punctuation\n", + " return word.lower() # Convert to lowercase\n", + "\n", + "# Apply the full cleaning function to all words\n", + "cleaned_prophet = list(map(clean_word, prophet))\n" ] }, { @@ -135,11 +192,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ - "# your code here" + "# print(prophet_reference[:10])" ] }, { @@ -151,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ @@ -164,8 +221,7 @@ " Input: 'the\\nbeloved'\n", " Output: ['the', 'beloved']\n", " '''\n", - " \n", - " # your code here" + " return x.split('\\n')\n" ] }, { @@ -177,13 +233,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": { "scrolled": true }, "outputs": [], "source": [ - "# your code here" + "\n", + "# prophet_line = list(map(line_break, prophet_reference))\n" ] }, { @@ -195,9 +252,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 63, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'prophet_line' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[63], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m prophet_flat \u001b[38;5;241m=\u001b[39m [i \u001b[38;5;28;01mfor\u001b[39;00m sub \u001b[38;5;129;01min\u001b[39;00m \u001b[43mprophet_line\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m sub]\n\u001b[1;32m 2\u001b[0m prophet_flat\n", + "\u001b[0;31mNameError\u001b[0m: name 'prophet_line' is not defined" + ] + } + ], "source": [ "prophet_flat = [i for sub in prophet_line for i in sub]\n", "prophet_flat" @@ -205,11 +274,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 64, "metadata": {}, "outputs": [], "source": [ - "# your code here" + "# prophet_flat = [i for sub in prophet_line for i in sub]\n", + "# print(prophet_flat[:20]) # to see the first 20 words" ] }, { @@ -223,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 65, "metadata": {}, "outputs": [], "source": [ @@ -243,8 +313,8 @@ " '''\n", " \n", " word_list = ['and', 'the', 'a', 'an']\n", - " \n", - " # your code here" + " return x not in word_list\n", + " filtered_words = list(filter(word_filter, prophet_flat))\n" ] }, { @@ -256,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": { "scrolled": true }, @@ -276,7 +346,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ @@ -300,7 +370,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -319,7 +389,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "metadata": { "scrolled": true }, @@ -337,7 +407,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ @@ -347,7 +417,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -361,7 +431,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.6" } }, "nbformat": 4,