Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion scrapegraphai/graphs/abstract_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pydantic import BaseModel

from ..helpers import models_tokens
from ..models import DeepSeek, OneApi
from ..models import CLoD, DeepSeek, OneApi
from ..utils.logging import set_verbosity_info, set_verbosity_warning


Expand Down Expand Up @@ -164,6 +164,7 @@ def _create_llm(self, llm_config: dict) -> object:
"deepseek",
"ernie",
"fireworks",
"clod",
"togetherai",
}

Expand Down Expand Up @@ -218,6 +219,7 @@ def _create_llm(self, llm_config: dict) -> object:
"ernie",
"deepseek",
"togetherai",
"clod",
}:
if llm_params["model_provider"] == "bedrock":
llm_params["model_kwargs"] = {
Expand All @@ -229,6 +231,9 @@ def _create_llm(self, llm_config: dict) -> object:
else:
model_provider = llm_params.pop("model_provider")

if model_provider == "clod":
return CLoD(**llm_params)

if model_provider == "deepseek":
return DeepSeek(**llm_params)

Expand Down
32 changes: 32 additions & 0 deletions scrapegraphai/helpers/models_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,5 +261,37 @@
"mixtral-moe-8x22B-instruct": 65536,
"mixtral-moe-8x7B-instruct": 65536,
},
"clod": {
"open-mistral-7b": 32000,
"Llama-3.1-70b": 128000,
"Llama-3.1-405b": 128000,
"Llama-3.3-70b": 128000,
"Llama-3.1-8b": 128000,
"gpt-4o": 128000,
"gpt-4o-mini": 128000,
"gpt-4-turbo": 128000,
"claude-3-opus-latest": 200000,
"gemini-1.5-flash-8b": 128000,
"gemini-1.5-flash": 128000,
"open-mixtral-8x7b": 32000,
"open-mixtral-8x22b": 64000,
"claude-3-5-sonnet-latest": 200000,
"claude-3-haiku-20240307": 200000,
"Qwen-2.5-Coder-32B": 32000,
"Deepseek-R1-Distill-Llama-70B": 131072,
"Deepseek-V3": 128000,
"Qwen-2-VL-72B": 128000,
"Deepseek-R1-Distill-Qwen-14B": 131072,
"Deepseek-R1-Distill-Qwen-1.5B": 131072,
"Deepseek-R1": 128000,
"Deepseek-Llm-Chat-67B": 4096,
"Qwen-2.5-7B": 132072,
"Qwen-2.5-72B": 132072,
"Qwen-2-72B": 128000,
"o1": 200000,
"gemini-2.0-flash-exp": 1000000,
"grok-beta": 128000,
"grok-2-latest": 128000,
},
"togetherai": {"Meta-Llama-3.1-70B-Instruct-Turbo": 128000},
}
8 changes: 2 additions & 6 deletions scrapegraphai/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,10 @@
This module contains the model definitions used in the ScrapeGraphAI application.
"""

from .clod import CLoD
from .deepseek import DeepSeek
from .oneapi import OneApi
from .openai_itt import OpenAIImageToText
from .openai_tts import OpenAITextToSpeech

__all__ = [
"DeepSeek",
"OneApi",
"OpenAIImageToText",
"OpenAITextToSpeech",
]
__all__ = ["DeepSeek", "OneApi", "OpenAIImageToText", "OpenAITextToSpeech", "CLoD"]
23 changes: 23 additions & 0 deletions scrapegraphai/models/clod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
CLōD Module
"""

from langchain_openai import ChatOpenAI


class CLoD(ChatOpenAI):
"""
A wrapper for the ChatOpenAI class (CLōD uses an OpenAI-like API) that
provides default configuration and could be extended with additional methods
if needed.

Args:
llm_config (dict): Configuration parameters for the language model.
"""

def __init__(self, **llm_config):
if "api_key" in llm_config:
llm_config["openai_api_key"] = llm_config.pop("api_key")
llm_config["openai_api_base"] = "https://api.clod.io/v1"

super().__init__(**llm_config)
1 change: 1 addition & 0 deletions tests/graphs/.env.example
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
OPENAI_API_KEY="YOUR OPENAI API KEY"
FIREWORKS_APIKEY="YOOUR FIREWORK KEY"
CLOD_API_KEY="YOUR CLOD API KEY"
55 changes: 55 additions & 0 deletions tests/graphs/smart_scraper_clod_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""
Module for testing the smart scraper class
"""

import os

import pytest
from dotenv import load_dotenv

from scrapegraphai.graphs import SmartScraperGraph

load_dotenv()


@pytest.fixture
def graph_config():
"""Configuration of the graph"""
clod_api_key = os.getenv("CLOD_API_KEY")
return {
"llm": {
"api_key": clod_api_key,
"model": "clod/claude-3-5-sonnet-latest",
},
"verbose": True,
"headless": False,
}


def test_scraping_pipeline(graph_config):
"""Start of the scraping pipeline"""
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
source="https://perinim.github.io/projects/",
config=graph_config,
)

result = smart_scraper_graph.run()

assert result is not None
assert isinstance(result, dict)


def test_get_execution_info(graph_config):
"""Get the execution info"""
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
source="https://perinim.github.io/projects/",
config=graph_config,
)

smart_scraper_graph.run()

graph_exec_info = smart_scraper_graph.get_execution_info()

assert graph_exec_info is not None
Loading