langchain-ai
diff --git a/‎libs/langchain/langchain/chains/rl_chain/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎libs/langchain/langchain/chains/rl_chain/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎libs/langchain/langchain/chains/rl_chain/base.py‎
Lines changed: 11 additions & 17 deletions b/‎libs/langchain/langchain/chains/rl_chain/base.py‎
Lines changed: 11 additions & 17 deletions
diff --git a/‎libs/langchain/langchain/chains/rl_chain/pick_best_chain.py‎
Lines changed: 141 additions & 29 deletions b/‎libs/langchain/langchain/chains/rl_chain/pick_best_chain.py‎
Lines changed: 141 additions & 29 deletions
@@ -15,6 +15,7 @@
 from langchain.chains.rl_chain.pick_best_chain import (
     PickBest,
     PickBestEvent,
+    PickBestFeatureEmbedder,
     PickBestSelected,
 )
 
@@ -37,6 +38,7 @@ def configure_logger() -> None:
     "PickBest",
     "PickBestEvent",
     "PickBestSelected",
+    "PickBestFeatureEmbedder",
     "Embed",
     "BasedOn",
     "ToSelectFrom",
 
@@ -118,8 +118,7 @@ def get_based_on_and_to_select_from(inputs: Dict[str, Any]) -> Tuple[Dict, Dict]
 
     if not to_select_from:
         raise ValueError(
-            "No variables using 'ToSelectFrom' found in the inputs. \
-                Please include at least one variable containing a list to select from."
+            "No variables using 'ToSelectFrom' found in the inputs. Please include at least one variable containing a list to select from."  # noqa: E501
         )
 
     based_on = {
@@ -229,6 +228,9 @@ def save(self) -> None:
 
 
 class Embedder(Generic[TEvent], ABC):
+    def __init__(self, *args: Any, **kwargs: Any):
+        pass
+
     @abstractmethod
     def format(self, event: TEvent) -> str:
         ...
@@ -300,9 +302,7 @@ def score_response(
             return resp
         except Exception as e:
             raise RuntimeError(
-                f"The auto selection scorer did not manage to score the response, \
-                    there is always the option to try again or tweak the reward prompt.\
-                         Error: {e}"
+                f"The auto selection scorer did not manage to score the response, there is always the option to try again or tweak the reward prompt. Error: {e}"  # noqa: E501
             )
 
 
@@ -316,7 +316,7 @@ class RLChain(Chain, Generic[TEvent]):
         - selection_scorer (Union[SelectionScorer, None]): Scorer for the selection. Can be set to None.
         - policy (Optional[Policy]): The policy used by the chain to learn to populate a dynamic prompt.
         - auto_embed (bool): Determines if embedding should be automatic. Default is False.
-        - metrics (Optional[MetricsTracker]): Tracker for metrics, can be set to None.
+        - metrics (Optional[Union[MetricsTrackerRollingWindow, MetricsTrackerAverage]]): Tracker for metrics, can be set to None.
 
     Initialization Attributes:
         - feature_embedder (Embedder): Embedder used for the `BasedOn` and `ToSelectFrom` inputs.
@@ -325,7 +325,8 @@ class RLChain(Chain, Generic[TEvent]):
         - vw_cmd (List[str], optional): Command line arguments for the VW model.
         - policy (Type[VwPolicy]): Policy used by the chain.
         - vw_logs (Optional[Union[str, os.PathLike]]): Path for the VW logs.
-        - metrics_step (int): Step for the metrics tracker. Default is -1.
+        - metrics_step (int): Step for the metrics tracker. Default is -1. If set without metrics_window_size, average metrics will be tracked, otherwise rolling window metrics will be tracked.
+        - metrics_window_size (int): Window size for the metrics tracker. Default is -1. If set, rolling window metrics will be tracked.
 
     Notes:
         The class initializes the VW model using the provided arguments. If `selection_scorer` is not provided, a warning is logged, indicating that no reinforcement learning will occur unless the `update_with_delayed_score` method is called.
@@ -423,8 +424,7 @@ def update_with_delayed_score(
         """  # noqa: E501
         if self._can_use_selection_scorer() and not force_score:
             raise RuntimeError(
-                "The selection scorer is set, and force_score was not set to True. \
-                    Please set force_score=True to use this function."
+                "The selection scorer is set, and force_score was not set to True. Please set force_score=True to use this function."  # noqa: E501
             )
         if self.metrics:
             self.metrics.on_feedback(score)
@@ -458,9 +458,7 @@ def _validate_inputs(self, inputs: Dict[str, Any]) -> None:
             or self.selected_based_on_input_key in inputs.keys()
         ):
             raise ValueError(
-                f"The rl chain does not accept '{self.selected_input_key}' \
-                    or '{self.selected_based_on_input_key}' as input keys, \
-                        they are reserved for internal use during auto reward."
+                f"The rl chain does not accept '{self.selected_input_key}' or '{self.selected_based_on_input_key}' as input keys, they are reserved for internal use during auto reward."  # noqa: E501
             )
 
     def _can_use_selection_scorer(self) -> bool:
@@ -498,9 +496,6 @@ def _call(
     ) -> Dict[str, Any]:
         _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
 
-        if self.auto_embed:
-            inputs = prepare_inputs_for_autoembed(inputs=inputs)
-
         event: TEvent = self._call_before_predict(inputs=inputs)
         prediction = self.active_policy.predict(event=event)
         if self.metrics:
@@ -573,8 +568,7 @@ def embed_string_type(
 
     if namespace is None:
         raise ValueError(
-            "The default namespace must be \
-                provided when embedding a string or _Embed object."
+            "The default namespace must be provided when embedding a string or _Embed object."  # noqa: E501
         )
 
     return {namespace: keep_str + encoded}
 
@@ -53,21 +53,24 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]):
         model name (Any, optional): The type of embeddings to be used for feature representation. Defaults to BERT SentenceTransformer.
     """  # noqa E501
 
-    def __init__(self, model: Optional[Any] = None, *args: Any, **kwargs: Any):
+    def __init__(
+        self, auto_embed: bool, model: Optional[Any] = None, *args: Any, **kwargs: Any
+    ):
         super().__init__(*args, **kwargs)
 
         if model is None:
             from sentence_transformers import SentenceTransformer
 
-            model = SentenceTransformer("bert-base-nli-mean-tokens")
+            model = SentenceTransformer("all-mpnet-base-v2")
 
         self.model = model
+        self.auto_embed = auto_embed
 
-    def format(self, event: PickBestEvent) -> str:
-        """
-        Converts the `BasedOn` and `ToSelectFrom` into a format that can be used by VW
-        """
+    @staticmethod
+    def _str(embedding: List[float]) -> str:
+        return " ".join([f"{i}:{e}" for i, e in enumerate(embedding)])
 
+    def get_label(self, event: PickBestEvent) -> tuple:
         cost = None
         if event.selected:
             chosen_action = event.selected.index
@@ -77,7 +80,11 @@ def format(self, event: PickBestEvent) -> str:
                 else None
             )
             prob = event.selected.probability
+            return chosen_action, cost, prob
+        else:
+            return None, None, None
 
+    def get_context_and_action_embeddings(self, event: PickBestEvent) -> tuple:
         context_emb = base.embed(event.based_on, self.model) if event.based_on else None
         to_select_from_var_name, to_select_from = next(
             iter(event.to_select_from.items()), (None, None)
@@ -97,6 +104,95 @@ def format(self, event: PickBestEvent) -> str:
             raise ValueError(
                 "Context and to_select_from must be provided in the inputs dictionary"
             )
+        return context_emb, action_embs
+
+    def get_indexed_dot_product(self, context_emb: List, action_embs: List) -> Dict:
+        import numpy as np
+
+        unique_contexts = set()
+        for context_item in context_emb:
+            for ns, ee in context_item.items():
+                if isinstance(ee, list):
+                    for ea in ee:
+                        unique_contexts.add(f"{ns}={ea}")
+                else:
+                    unique_contexts.add(f"{ns}={ee}")
+
+        encoded_contexts = self.model.encode(list(unique_contexts))
+        context_embeddings = dict(zip(unique_contexts, encoded_contexts))
+
+        unique_actions = set()
+        for action in action_embs:
+            for ns, e in action.items():
+                if isinstance(e, list):
+                    for ea in e:
+                        unique_actions.add(f"{ns}={ea}")
+                else:
+                    unique_actions.add(f"{ns}={e}")
+
+        encoded_actions = self.model.encode(list(unique_actions))
+        action_embeddings = dict(zip(unique_actions, encoded_actions))
+
+        action_matrix = np.stack([v for k, v in action_embeddings.items()])
+        context_matrix = np.stack([v for k, v in context_embeddings.items()])
+        dot_product_matrix = np.dot(context_matrix, action_matrix.T)
+
+        indexed_dot_product: Dict = {}
+
+        for i, context_key in enumerate(context_embeddings.keys()):
+            indexed_dot_product[context_key] = {}
+            for j, action_key in enumerate(action_embeddings.keys()):
+                indexed_dot_product[context_key][action_key] = dot_product_matrix[i, j]
+
+        return indexed_dot_product
+
+    def format_auto_embed_on(self, event: PickBestEvent) -> str:
+        chosen_action, cost, prob = self.get_label(event)
+        context_emb, action_embs = self.get_context_and_action_embeddings(event)
+        indexed_dot_product = self.get_indexed_dot_product(context_emb, action_embs)
+
+        action_lines = []
+        for i, action in enumerate(action_embs):
+            line_parts = []
+            dot_prods = []
+            if cost is not None and chosen_action == i:
+                line_parts.append(f"{chosen_action}:{cost}:{prob}")
+            for ns, action in action.items():
+                line_parts.append(f"|{ns}")
+                elements = action if isinstance(action, list) else [action]
+                nsa = []
+                for elem in elements:
+                    line_parts.append(f"{elem}")
+                    ns_a = f"{ns}={elem}"
+                    nsa.append(ns_a)
+                    for k, v in indexed_dot_product.items():
+                        dot_prods.append(v[ns_a])
+                nsa_str = " ".join(nsa)
+                line_parts.append(f"|# {nsa_str}")
+
+            line_parts.append(f"|dotprod {self._str(dot_prods)}")
+            action_lines.append(" ".join(line_parts))
+
+        shared = []
+        for item in context_emb:
+            for ns, context in item.items():
+                shared.append(f"|{ns}")
+                elements = context if isinstance(context, list) else [context]
+                nsc = []
+                for elem in elements:
+                    shared.append(f"{elem}")
+                    nsc.append(f"{ns}={elem}")
+                nsc_str = " ".join(nsc)
+                shared.append(f"|@ {nsc_str}")
+
+        return "shared " + " ".join(shared) + "\n" + "\n".join(action_lines)
+
+    def format_auto_embed_off(self, event: PickBestEvent) -> str:
+        """
+        Converts the `BasedOn` and `ToSelectFrom` into a format that can be used by VW
+        """
+        chosen_action, cost, prob = self.get_label(event)
+        context_emb, action_embs = self.get_context_and_action_embeddings(event)
 
         example_string = ""
         example_string += "shared "
@@ -120,6 +216,12 @@ def format(self, event: PickBestEvent) -> str:
         # Strip the last newline
         return example_string[:-1]
 
+    def format(self, event: PickBestEvent) -> str:
+        if self.auto_embed:
+            return self.format_auto_embed_on(event)
+        else:
+            return self.format_auto_embed_off(event)
+
 
 class PickBest(base.RLChain[PickBestEvent]):
     """
@@ -154,50 +256,60 @@ def __init__(
         *args: Any,
         **kwargs: Any,
     ):
-        vw_cmd = kwargs.get("vw_cmd", [])
-        if not vw_cmd:
-            vw_cmd = [
-                "--cb_explore_adf",
-                "--quiet",
-                "--interactions=::",
-                "--coin",
-                "--squarecb",
-            ]
+        auto_embed = kwargs.get("auto_embed", False)
+
+        feature_embedder = kwargs.get("feature_embedder", None)
+        if feature_embedder:
+            if "auto_embed" in kwargs:
+                logger.warning(
+                    "auto_embed will take no effect when explicit feature_embedder is provided"  # noqa E501
+                )
+            # turning auto_embed off for cli setting below
+            auto_embed = False
         else:
+            feature_embedder = PickBestFeatureEmbedder(auto_embed=auto_embed)
+        kwargs["feature_embedder"] = feature_embedder
+
+        vw_cmd = kwargs.get("vw_cmd", [])
+        if vw_cmd:
             if "--cb_explore_adf" not in vw_cmd:
                 raise ValueError(
                     "If vw_cmd is specified, it must include --cb_explore_adf"
                 )
-        kwargs["vw_cmd"] = vw_cmd
+        else:
+            interactions = ["--interactions=::"]
+            if auto_embed:
+                interactions = [
+                    "--interactions=@#",
+                    "--ignore_linear=@",
+                    "--ignore_linear=#",
+                ]
+            vw_cmd = interactions + [
+                "--cb_explore_adf",
+                "--coin",
+                "--squarecb",
+                "--quiet",
+            ]
 
-        feature_embedder = kwargs.get("feature_embedder", None)
-        if not feature_embedder:
-            feature_embedder = PickBestFeatureEmbedder()
-        kwargs["feature_embedder"] = feature_embedder
+        kwargs["vw_cmd"] = vw_cmd
 
         super().__init__(*args, **kwargs)
 
     def _call_before_predict(self, inputs: Dict[str, Any]) -> PickBestEvent:
         context, actions = base.get_based_on_and_to_select_from(inputs=inputs)
         if not actions:
             raise ValueError(
-                "No variables using 'ToSelectFrom' found in the inputs. \
-                    Please include at least one variable containing \
-                        a list to select from."
+                "No variables using 'ToSelectFrom' found in the inputs. Please include at least one variable containing a list to select from."  # noqa E501
             )
 
         if len(list(actions.values())) > 1:
             raise ValueError(
-                "Only one variable using 'ToSelectFrom' can be provided in the inputs \
-                    for the PickBest chain. Please provide only one variable \
-                        containing a list to select from."
+                "Only one variable using 'ToSelectFrom' can be provided in the inputs for the PickBest chain. Please provide only one variable containing a list to select from."  # noqa E501
             )
 
         if not context:
             raise ValueError(
-                "No variables using 'BasedOn' found in the inputs. \
-                    Please include at least one variable containing information \
-                        to base the selected of ToSelectFrom on."
+                "No variables using 'BasedOn' found in the inputs. Please include at least one variable containing information to base the selected of ToSelectFrom on."  # noqa E501
             )
 
         event = PickBestEvent(inputs=inputs, to_select_from=actions, based_on=context)