diff --git a/.gitignore b/.gitignore index f4b980ff..b954eb8c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ .DS_Store .DS_Store? ._* -__pycache__/ +**/__pycache__/ +docs/build/ +docs/source/_templates/ .env +venv/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..97288896 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,65 @@ +# Contributing to AmazScraper + +Thank you for your interest in contributing to **AmazScraper**! We welcome contributions from the community to help improve and grow the project. This document outlines the guidelines and steps for contributing. + +## Table of Contents +- [Getting Started](#getting-started) +- [Contributing Guidelines](#contributing-guidelines) +- [Code Style](#code-style) +- [Submitting a Pull Request](#submitting-a-pull-request) +- [Reporting Issues](#reporting-issues) +- [License](#license) + +## Getting Started +To get started with contributing, follow these steps: + +1. Fork the repository on GitHub. +2. Clone your forked repository to your local machine. +3. Install the necessary dependencies. +4. Make your changes or additions. +5. Test your changes thoroughly. +6. Commit your changes with descriptive commit messages. +7. Push your changes to your forked repository. +8. Submit a pull request to the main repository. + +## Contributing Guidelines +Please adhere to the following guidelines when contributing to AmazScraper: + +- Follow the code style and formatting guidelines specified in the [Code Style](#code-style) section. +- Make sure your changes are well-documented and include any necessary updates to the project's documentation. +- Write clear and concise commit messages that describe the purpose of your changes. +- Be respectful and considerate towards other contributors and maintainers. + +## Code Style +Please make sure to format your code accordingly before submitting a pull request. +### Python +- [Style Guide for Python Code](https://www.python.org/dev/peps/pep-0008/) +- [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html) +- [The Hitchhiker's Guide to Python](https://docs.python-guide.org/writing/style/) + +### Arduino +- [Arduino Style Guide for Writing Content](https://docs.arduino.cc/learn/contributions/arduino-writing-style-guide) +- [Arduino Style Guide for Creating Libraries](https://docs.arduino.cc/learn/contributions/arduino-library-style-guide) + +### C++ +- [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html) + +## Submitting a Pull Request +To submit your changes for review, please follow these steps: + +1. Ensure that your changes are pushed to your forked repository. +2. Go to the main repository on GitHub and navigate to the "Pull Requests" tab. +3. Click on the "New Pull Request" button. +4. Select your forked repository and the branch containing your changes. +5. Provide a descriptive title and detailed description for your pull request. +6. Reviewers will provide feedback and discuss any necessary changes. +7. Once your pull request is approved, it will be merged into the main repository. + +## Reporting Issues +If you encounter any issues or have suggestions for improvements, please open an issue on the GitHub repository. Provide a clear and detailed description of the problem or suggestion, along with any relevant information or steps to reproduce the issue. + +## License +AmazScraper is licensed under the **Apache License 2.0**. See the [LICENSE](LICENSE) file for more information. +By contributing to this project, you agree to license your contributions under the same license. + +Can't wait to see your contributions! :smile: diff --git a/README.md b/README.md index 17f1ee0d..7345ca6f 100644 --- a/README.md +++ b/README.md @@ -16,36 +16,51 @@ Follow the following steps: 1. ```bash git clone https://github.com/VinciGit00/AmazScraper.git ``` -2. ```bash +2. (Optional) + ```bash + python -m venv venv + source ./venv/bin/activate + ``` +4. ```bash pip install -r requirements.txt ``` -3. Go to [https://openai.com](https://openai.com/) and login -4. Now you can access to [https://platform.openai.com/docs/overview](https://platform.openai.com/docs/overview) -5. Create a new API key and copy it - ![Screenshot 2024-01-26 alle 17.10.10.png](docs/generate_api_key/step_1.png) +5. Go to [https://openai.com](https://openai.com/) and login +6. Now you can access to [https://platform.openai.com/docs/overview](https://platform.openai.com/docs/overview) +7. Create a new API key and copy it + + Step 1 Screenshot + + Step 2 Screenshot + + Step 3 Screenshot + + Step 4 Screenshot -![Screenshot 2024-01-26 alle 17.10.31.png](docs/generate_api_key/step_2.png) -![Screenshot 2024-01-26 alle 17.10.52.png](docs/generate_api_key/step_3.png) - -![Screenshot 2024-01-26 alle 17.11.10.png](docs/generate_api_key/step_4.png) - -6. Open the .env file inside main and paste the API key +7. Create a .env file inside the main and paste the API key ```config API_KEY="your openai.com api key" ``` -7. You are ready to go! 🚀 - +8. You are ready to go! 🚀 +9. Try running the examples using: + ```bash + python -m examples.html_scraping + ``` + or + ```bash + python -m AmazScraper.examples.html_scraping + ``` + # Practical use ## Using AmazScraper as a library ```python -from AmazScraper.class_generator import Generator +from AmazScraper.classes.class_generator import Generator -from AmazScraper.getter import get_function, scraper +from AmazScraper.utils.getter import get_function, scraper values = [ { @@ -66,7 +81,7 @@ if __name__ == "__main__": ```python import sys -from AmazScraper.class_generator import Generator +from AmazScraper.classes.class_generator import Generator values = [ { diff --git a/__init__.py b/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/classes/__init__.py b/classes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/class_generator.py b/classes/class_generator.py similarity index 94% rename from class_generator.py rename to classes/class_generator.py index 858c4081..7144f509 100644 --- a/class_generator.py +++ b/classes/class_generator.py @@ -1,7 +1,7 @@ import os from dotenv import load_dotenv -from AmazScraper.pydantic_class import _Response -from AmazScraper.class_creator import create_class +from classes.pydantic_class import _Response +from utils.class_creator import create_class from langchain_openai import ChatOpenAI from langchain.prompts import PromptTemplate from langchain_core.pydantic_v1 import Field diff --git a/pydantic_class.py b/classes/pydantic_class.py similarity index 60% rename from pydantic_class.py rename to classes/pydantic_class.py index e9b554d1..40076518 100644 --- a/pydantic_class.py +++ b/classes/pydantic_class.py @@ -2,4 +2,4 @@ from langchain_core.pydantic_v1 import BaseModel, Field class _Response(BaseModel): - title: str = Field(description='Title of the news') + title: str = Field(description='Title of the items') diff --git a/examples/html_scraping.py b/examples/html_scraping.py new file mode 100644 index 00000000..bd08f6b5 --- /dev/null +++ b/examples/html_scraping.py @@ -0,0 +1,54 @@ +import sys +from classes.class_generator import Generator + +values = [ + { + "title": "title", + "type": "str", + "description": "Title of the news" + } +] + +# Example using a HTML code +query_info = ''' + Given this code extract all the information in a json format about the news. +
+
+

Booker show with 52 points, whoever has the most games over 50

+
+ Standings +
+

The Suns' No. 1 dominated the match won in New Orleans, scoring 52 points. It's about...

+
+ + + + + + + + 28 foto + +
+
+
+
+ Partite con 50+ punti: Booker in Top-20 + +
+ + + + + +
+
+ ''' + +if __name__ == "__main__": + + generator_instance = Generator(values, 0, "gpt-3.5-turbo") + + generator_instance.invocation(query_info) \ No newline at end of file diff --git a/examples/values_scraping.py b/examples/values_scraping.py new file mode 100644 index 00000000..831bb2be --- /dev/null +++ b/examples/values_scraping.py @@ -0,0 +1,17 @@ +from classes.class_generator import Generator + +from utils.getter import get_function, scraper + +values = [ + { + "title": "title", + "type": "str", + "description": "Title of the items" + } +] + +if __name__ == "__main__": + + generator_instance = Generator(values, 0, "gpt-3.5-turbo") + + res = generator_instance.invocation(scraper("https://www.mockupworld.co", 4197)) \ No newline at end of file diff --git a/readthedocs.yaml b/readthedocs.yaml new file mode 100644 index 00000000..c1f4a6fe --- /dev/null +++ b/readthedocs.yaml @@ -0,0 +1,20 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.9" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/source/conf.py + +# Specify the requirements file +python: + install: + - requirements: requirements.txt diff --git a/requirements.txt b/requirements.txt index c7d2b710..66413914 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ langchain_core==0.1.16 langchain_openai==0.0.5 python-dotenv==1.0.1 Requests==2.31.0 +pytest==8.0.0 diff --git a/tests/test_amaz_scraper.py b/tests/test_amaz_scraper.py index fb4c7732..9158b7a3 100644 --- a/tests/test_amaz_scraper.py +++ b/tests/test_amaz_scraper.py @@ -1,6 +1,6 @@ import pytest -from AmazScraper.pydantic_class import Response -from AmazScraper.class_creator import create_class +from classes.pydantic_class import _Response +from utils.class_creator import create_class from langchain_openai import ChatOpenAI @pytest.fixture @@ -19,5 +19,5 @@ def test_generator_invocation(generator): def test_response_model(): # Test the Response Pydantic model response_data = {"title_swebsite": "Test Title"} - response = Response(**response_data) + response = _Response(**response_data) assert response.title_swebsite == "Test Title" \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/class_creator.py b/utils/class_creator.py similarity index 87% rename from class_creator.py rename to utils/class_creator.py index 3933c306..36228120 100644 --- a/class_creator.py +++ b/utils/class_creator.py @@ -10,5 +10,5 @@ def create_class(data_dict: dict): global base_script base_script = base_script + f" {elem['title']}: {elem['type']} = Field(description='{elem['description']}')\n" - with open("AmazScraper/pydantic_class.py", "w") as f: + with open("classes/pydantic_class.py", "w") as f: f.write(base_script) diff --git a/getter.py b/utils/getter.py similarity index 100% rename from getter.py rename to utils/getter.py