diff --git a/tests/test_conf.py b/tests/test_conf.py new file mode 100644 index 00000000..6ef27786 --- /dev/null +++ b/tests/test_conf.py @@ -0,0 +1,36 @@ +import os +import sys +import unittest + +from docs.source import conf + +# Add the project root to the Python path +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +# Import the conf module + +class TestSphinxConfig(unittest.TestCase): + def test_sphinx_configuration(self): + # Test project information + self.assertEqual(conf.project, "ScrapeGraphAI") + self.assertEqual(conf.copyright, "2024, ScrapeGraphAI") + self.assertEqual(conf.author, "Marco Vinciguerra, Marco Perini, Lorenzo Padoan") + + # Test general configuration + self.assertIn("sphinx.ext.autodoc", conf.extensions) + self.assertIn("sphinx.ext.napoleon", conf.extensions) + self.assertEqual(conf.templates_path, ["_templates"]) + self.assertEqual(conf.exclude_patterns, []) + + # Test HTML output configuration + self.assertEqual(conf.html_theme, "furo") + self.assertIsInstance(conf.html_theme_options, dict) + self.assertEqual(conf.html_theme_options["source_repository"], + "https://github.com/VinciGit00/Scrapegraph-ai/") + self.assertEqual(conf.html_theme_options["source_branch"], "main") + self.assertEqual(conf.html_theme_options["source_directory"], "docs/source/") + self.assertTrue(conf.html_theme_options["navigation_with_keys"]) + self.assertFalse(conf.html_theme_options["sidebar_hide_name"]) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/utils/research_web_test.py b/tests/utils/research_web_test.py index a4a37191..e34a4238 100644 --- a/tests/utils/research_web_test.py +++ b/tests/utils/research_web_test.py @@ -1,32 +1,45 @@ import pytest -from scrapegraphai.utils.research_web import ( # Replace with actual path to your file +from scrapegraphai.utils.research_web import ( # Replace with actual path to your file, filter_pdf_links + search_on_web, ) - def test_google_search(): """Tests search_on_web with Google search engine.""" results = search_on_web("test query", search_engine="Google", max_results=2) assert len(results) == 2 # You can further assert if the results actually contain 'test query' in the title/snippet using additional libraries - def test_bing_search(): """Tests search_on_web with Bing search engine.""" results = search_on_web("test query", search_engine="Bing", max_results=1) assert results is not None # You can further assert if the results contain '.com' or '.org' in the domain - def test_invalid_search_engine(): """Tests search_on_web with invalid search engine.""" with pytest.raises(ValueError): search_on_web("test query", search_engine="Yahoo", max_results=5) - def test_max_results(): """Tests search_on_web with different max_results values.""" results_5 = search_on_web("test query", max_results=5) results_10 = search_on_web("test query", max_results=10) assert len(results_5) <= len(results_10) + +def test_filter_pdf_links(): + """Tests filter_pdf_links function to ensure it filters out PDF links.""" + test_links = [ + "http://example.com/document.pdf", + "https://example.org/page.html", + "http://test.com/file.PDF", + "https://another.com/index.htm", + ] + filtered_links = filter_pdf_links(test_links) + + assert len(filtered_links) == 2 + assert "http://example.com/document.pdf" not in filtered_links + assert "http://test.com/file.PDF" not in filtered_links + assert "https://example.org/page.html" in filtered_links + assert "https://another.com/index.htm" in filtered_links \ No newline at end of file