Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions tests/test_conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import sys
import unittest

from docs.source import conf

# Add the project root to the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

# Import the conf module

class TestSphinxConfig(unittest.TestCase):
def test_sphinx_configuration(self):
# Test project information
self.assertEqual(conf.project, "ScrapeGraphAI")
self.assertEqual(conf.copyright, "2024, ScrapeGraphAI")
self.assertEqual(conf.author, "Marco Vinciguerra, Marco Perini, Lorenzo Padoan")

# Test general configuration
self.assertIn("sphinx.ext.autodoc", conf.extensions)
self.assertIn("sphinx.ext.napoleon", conf.extensions)
self.assertEqual(conf.templates_path, ["_templates"])
self.assertEqual(conf.exclude_patterns, [])

# Test HTML output configuration
self.assertEqual(conf.html_theme, "furo")
self.assertIsInstance(conf.html_theme_options, dict)
self.assertEqual(conf.html_theme_options["source_repository"],
"https://github.com/VinciGit00/Scrapegraph-ai/")
self.assertEqual(conf.html_theme_options["source_branch"], "main")
self.assertEqual(conf.html_theme_options["source_directory"], "docs/source/")
self.assertTrue(conf.html_theme_options["navigation_with_keys"])
self.assertFalse(conf.html_theme_options["sidebar_hide_name"])

if __name__ == '__main__':
unittest.main()
23 changes: 18 additions & 5 deletions tests/utils/research_web_test.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,45 @@
import pytest

from scrapegraphai.utils.research_web import ( # Replace with actual path to your file
from scrapegraphai.utils.research_web import ( # Replace with actual path to your file, filter_pdf_links

search_on_web,
)


def test_google_search():
"""Tests search_on_web with Google search engine."""
results = search_on_web("test query", search_engine="Google", max_results=2)
assert len(results) == 2
# You can further assert if the results actually contain 'test query' in the title/snippet using additional libraries


def test_bing_search():
"""Tests search_on_web with Bing search engine."""
results = search_on_web("test query", search_engine="Bing", max_results=1)
assert results is not None
# You can further assert if the results contain '.com' or '.org' in the domain


def test_invalid_search_engine():
"""Tests search_on_web with invalid search engine."""
with pytest.raises(ValueError):
search_on_web("test query", search_engine="Yahoo", max_results=5)


def test_max_results():
"""Tests search_on_web with different max_results values."""
results_5 = search_on_web("test query", max_results=5)
results_10 = search_on_web("test query", max_results=10)
assert len(results_5) <= len(results_10)

def test_filter_pdf_links():
"""Tests filter_pdf_links function to ensure it filters out PDF links."""
test_links = [
"http://example.com/document.pdf",
"https://example.org/page.html",
"http://test.com/file.PDF",
"https://another.com/index.htm",
]
filtered_links = filter_pdf_links(test_links)

assert len(filtered_links) == 2
assert "http://example.com/document.pdf" not in filtered_links
assert "http://test.com/file.PDF" not in filtered_links
assert "https://example.org/page.html" in filtered_links
assert "https://another.com/index.htm" in filtered_links