enh: use langchain's callback for langfuse instead of litellm's

thiswillbeyourgithub · thiswillbeyourgithub · commit 6d62ed41ea3e · 2024-10-11T21:26:42.000+02:00
Signed-off-by: thiswillbeyourgithub
&lt;26625900+thiswillbeyourgithub@users.noreply.github.com&gt;
diff --git a/README.md b/README.md
@@ -106,7 +106,7 @@ wdoc --path $link --task summarize --filetype "online_pdf"
 * **Statically typed**: Runtime type checking. Opt out with an environment flag: `WDOC_TYPECHECKING="disabled / warn / crash" wdoc` (by default: `warn`). Thanks to [beartype](https://beartype.readthedocs.io/en/latest/) it shouldn't even slow down the code!
 * **LLM (and embeddings) caching**: speed things up, as well as index storing and loading (handy for large collections).
 * **Good PDF parsing** PDF parsers are notoriously unreliable, so 15 (!) different loaders are used, and the best according to a parsing scorer is kept. Including table support via [openparse](https://github.com/Filimoa/open-parse/) (no GPU needed by default) or via [UnstructuredPDFLoader](https://python.langchain.com/docs/integrations/document_loaders/unstructured_pdfloader/).
-* **Langfuse support**: If you set the appropriate langfuse environment variables they will be used. See [this guide to learn more](https://langfuse.com/docs/integrations/litellm/tracing) (Note: disabled if using private_mode)
+* **Langfuse support**: If you set the appropriate langfuse environment variables they will be used. See [this guide](https://langfuse.com/docs/integrations/langchain/tracing) or [this one](https://langfuse.com/docs/integrations/litellm/tracing) to learn more (Note: this is disabled if using private_mode to avoid any leaks).
 * **Document filtering**: based on regex for document content or metadata.
 * **Fast**: Parallel document loading, parsing, embeddings, querying, etc.
 * **Shell autocompletion** using [python-fire](https://github.com/google/python-fire/blob/master/docs/using-cli.md#completion-flag)
diff --git a/wdoc/utils/llm.py b/wdoc/utils/llm.py
@@ -24,6 +24,28 @@
 
 TESTING_LLM = "testing/testing"
 
+if (
+        "LANGFUSE_PUBLIC_KEY" in os.environ and
+        "LANGFUSE_SECRET_KEY" in os.environ and
+        "LANGFUSE_HOST" in os.environ
+) and not is_private:
+    red("Activating langfuse callbacks")
+    try:
+        # # litellm's callbacks seem more flawed than langchain's
+        # import langfuse
+        # litellm.success_callback = ["langfuse"]
+        # litellm.failure_callback = ["langfuse"]
+
+        from langfuse.callback import CallbackHandler as LangfuseCallback
+        langfuse_handler = [LangfuseCallback(
+            secret_key=os.environ["LANGFUSE_SECRET_KEY"],
+            public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
+            host=os.environ["LANGFUSE_HOST"],
+        )]
+    except Exception as e:
+        red(f"Failed to setup langfuse callback, make sure package 'langfuse' is installed. The error was: ''{e}'")
+        langfuse_handler = []
+
 
 @optional_typecheck
 def load_llm(
@@ -104,13 +126,14 @@ def load_llm(
         assert os.environ[f"{backend.upper()}_API_KEY"] == "REDACTED_BECAUSE_WDOC_IN_PRIVATE_MODE"
 
         assert os.environ[f"{backend.upper()}_API_KEY"] == "REDACTED_BECAUSE_WDOC_IN_PRIVATE_MODE"
+        assert not langfuse_handler, "Private argument but langfuse_handler appears set. Something went wrong so crashing just to be safe."
     else:
         assert not WDOC_PRIVATE_MODE
         assert "WDOC_PRIVATE_MODE" not in os.environ or os.environ["WDOC_PRIVATE_MODE"] == "false"
 
     assert private == is_private
 
-    if (not private) and (backend == "openai") and (api_base is None) and ("langfuse" not in litellm.success_callback):
+    if (not private) and (backend == "openai") and (api_base is None):
         max_tokens = litellm.get_model_info(modelname)["max_tokens"]
         if "max_tokens" not in extra_model_args:
             extra_model_args["max_tokens"] = int(max_tokens * 0.9)
@@ -119,7 +142,7 @@ def load_llm(
             cache=llm_cache,
             disable_streaming=True,  # Not needed and might break cache
             verbose=llm_verbosity,
-            callbacks=[PriceCountingCallback(verbose=llm_verbosity)],
+            callbacks=[PriceCountingCallback(verbose=llm_verbosity)] + langfuse_handler,
             **extra_model_args,
         )
     else:
@@ -133,7 +156,7 @@ def load_llm(
             cache=llm_cache,
             verbose=llm_verbosity,
             tags=tags,
-            callbacks=[PriceCountingCallback(verbose=llm_verbosity)],
+            callbacks=[PriceCountingCallback(verbose=llm_verbosity)] + langfuse_handler,
             **extra_model_args,
         )
         litellm.drop_params = True
diff --git a/wdoc/wdoc.py b/wdoc/wdoc.py
@@ -82,19 +82,6 @@
 
 os.environ["TOKENIZERS_PARALLELISM"] = "true"
 
-if (
-        "LANGFUSE_PUBLIC_KEY" in os.environ and
-        "LANGFUSE_SECRET_KEY" in os.environ and
-        "LANGFUSE_HOST" in os.environ
-) and not is_private:
-    red("Activating LANGFUSE litellm callbacks (meaning litellm's backend well be used, even if you're calling openai)")
-    try:
-        import langfuse
-        litellm.success_callback = ["langfuse"]
-        litellm.failure_callback = ["langfuse"]
-    except Exception as e:
-        red(f"Failed to import langfuse, make sure it's installed. Error was: ''{e}'")
-
 
 @optional_typecheck
 @set_USAGE_as_docstring