ScrapeGraphAI · codebeaver-ai · Jan 12, 2025 · Jan 12, 2025
diff --git a/tests/test_conf.py b/tests/test_conf.py
@@ -0,0 +1,36 @@
+import os
+import sys
+import unittest
+
+from docs.source import conf
+
+# Add the project root to the Python path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+# Import the conf module
+
+class TestSphinxConfig(unittest.TestCase):
+    def test_sphinx_configuration(self):
+        # Test project information
+        self.assertEqual(conf.project, "ScrapeGraphAI")
+        self.assertEqual(conf.copyright, "2024, ScrapeGraphAI")
+        self.assertEqual(conf.author, "Marco Vinciguerra, Marco Perini, Lorenzo Padoan")
+
+        # Test general configuration
+        self.assertIn("sphinx.ext.autodoc", conf.extensions)
+        self.assertIn("sphinx.ext.napoleon", conf.extensions)
+        self.assertEqual(conf.templates_path, ["_templates"])
+        self.assertEqual(conf.exclude_patterns, [])
+
+        # Test HTML output configuration
+        self.assertEqual(conf.html_theme, "furo")
+        self.assertIsInstance(conf.html_theme_options, dict)
+        self.assertEqual(conf.html_theme_options["source_repository"], 
+                         "https://github.com/VinciGit00/Scrapegraph-ai/")
+        self.assertEqual(conf.html_theme_options["source_branch"], "main")
+        self.assertEqual(conf.html_theme_options["source_directory"], "docs/source/")
+        self.assertTrue(conf.html_theme_options["navigation_with_keys"])
+        self.assertFalse(conf.html_theme_options["sidebar_hide_name"])
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/utils/research_web_test.py b/tests/utils/research_web_test.py
@@ -1,32 +1,45 @@
 import pytest
 
-from scrapegraphai.utils.research_web import (  # Replace with actual path to your file
+from scrapegraphai.utils.research_web import (  # Replace with actual path to your file, filter_pdf_links
+
     search_on_web,
 )
 
-
 def test_google_search():
     """Tests search_on_web with Google search engine."""
     results = search_on_web("test query", search_engine="Google", max_results=2)
     assert len(results) == 2
     # You can further assert if the results actually contain 'test query' in the title/snippet using additional libraries
 
-
 def test_bing_search():
     """Tests search_on_web with Bing search engine."""
     results = search_on_web("test query", search_engine="Bing", max_results=1)
     assert results is not None
     # You can further assert if the results contain '.com' or '.org' in the domain
 
-
 def test_invalid_search_engine():
     """Tests search_on_web with invalid search engine."""
     with pytest.raises(ValueError):
         search_on_web("test query", search_engine="Yahoo", max_results=5)
 
-
 def test_max_results():
     """Tests search_on_web with different max_results values."""
     results_5 = search_on_web("test query", max_results=5)
     results_10 = search_on_web("test query", max_results=10)
     assert len(results_5) <= len(results_10)
+
+def test_filter_pdf_links():
+    """Tests filter_pdf_links function to ensure it filters out PDF links."""
+    test_links = [
+        "http://example.com/document.pdf",
+        "https://example.org/page.html",
+        "http://test.com/file.PDF",
+        "https://another.com/index.htm",
+    ]
+    filtered_links = filter_pdf_links(test_links)
+
+    assert len(filtered_links) == 2
+    assert "http://example.com/document.pdf" not in filtered_links
+    assert "http://test.com/file.PDF" not in filtered_links
+    assert "https://example.org/page.html" in filtered_links
+    assert "https://another.com/index.htm" in filtered_links