diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_path_efficiency/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_path_efficiency/__init__.py
deleted file mode 100644
index 13347bc717f2..000000000000
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_path_efficiency/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-
-from ._path_efficiency import PathEfficiencyEvaluator
-
-__all__ = ["PathEfficiencyEvaluator"]
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py
new file mode 100644
index 000000000000..ad89c4170ce9
--- /dev/null
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py
@@ -0,0 +1,7 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+from ._task_navigation_efficiency import TaskNavigationEfficiencyEvaluator, TaskNavigationEfficiencyMatchingMode
+
+__all__ = ["TaskNavigationEfficiencyEvaluator", "TaskNavigationEfficiencyMatchingMode"]
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_path_efficiency/_path_efficiency.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py
similarity index 68%
rename from sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_path_efficiency/_path_efficiency.py
rename to sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py
index 65fb0c3b4eaf..304474944e84 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_path_efficiency/_path_efficiency.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py
@@ -1,40 +1,73 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-import json
+from enum import Enum
 from collections import Counter
+import json
 from typing import Dict, List, Union, Any, Tuple
 from typing_extensions import overload, override
 
-from azure.ai.evaluation._evaluators._common import EvaluatorBase
 from azure.ai.evaluation._constants import EVALUATION_PASS_FAIL_MAPPING
+from azure.ai.evaluation._evaluators._common import EvaluatorBase
+from azure.ai.evaluation._exceptions import (
+    ErrorCategory,
+    ErrorTarget,
+    EvaluationException,
+)
+
+
+class TaskNavigationEfficiencyMatchingMode(str, Enum):
+    """
+    Enumeration of task navigation efficiency matching mode.
+
+    This enum allows you to specify which single matching technique should be used when evaluating
+    the efficiency of an agent's tool calls sequence against a ground truth path.
+    """
+
+    EXACT_MATCH = "exact_match"
+    """
+    Binary metric indicating whether the agent's tool calls exactly match the ground truth.
+
+    Returns True only if the agent's tool calls sequence is identical to the expected sequence
+    in both order and content (no extra steps, no missing steps, correct order).
+    """
+
+    IN_ORDER_MATCH = "in_order_match"
+    """
+    Binary metric allowing extra steps but requiring correct order of required tool calls.
+    
+    Returns True if all ground truth steps appear in the agent's sequence in the correct
+    order, even if there are additional steps interspersed.
+    """
+
+    ANY_ORDER_MATCH = "any_order_match"
+    """
+    Binary metric allowing both extra steps and different ordering.
+    
+    Returns True if all ground truth steps appear in the agent's sequence with sufficient
+    frequency, regardless of order. Most lenient matching criterion.
+    """
 
 
-class PathEfficiencyEvaluator(EvaluatorBase):
+class TaskNavigationEfficiencyEvaluator(EvaluatorBase):
     """
     Evaluates whether an agent's sequence of actions is efficient and follows optimal decision-making patterns.
 
-    The Path Efficiency Evaluator calculates precision, recall, and F1 scores based on the comparison
-    between the agent's tool usage trajectory and the ground truth expected steps. It also provides
-    three binary match metrics: exact match, in-order match (allows extra steps), and any-order match (allows extra steps and ignores order).
+    The Task Navigation Efficiency Evaluator returns binary matching results between the agent's tool usage trajectory and the ground truth expected steps.
+    It has three matching techniques: exact match, in-order match (allows extra steps), and any-order match (allows extra steps and ignores order).
+    It also returns precision, recall, and F1 scores in properties bag.
 
-    :param precision_threshold: The threshold value to determine if the precision evaluation passes or fails. Default is 0.5.
-    :type precision_threshold: float
-    :param recall_threshold: The threshold value to determine if the recall evaluation passes or fails. Default is 0.5.
-    :type recall_threshold: float
-    :param f1_score_threshold: The threshold value to determine if the F1 score evaluation passes or fails. Default is 0.5.
-    :type f1_score_threshold: float
+    :param matching_mode: The matching mode to use. Default is "exact_match".
+    :type matching_mode: enum[str, TaskNavigationEfficiencyMatchingMode]
 
     .. admonition:: Example:
 
         .. code-block:: python
 
-            from azure.ai.evaluation import PathEfficiencyEvaluator
+            from azure.ai.evaluation import TaskNavigationEfficiencyEvaluator
 
-            path_efficiency_eval = PathEfficiencyEvaluator(
-                precision_threshold=0.7,
-                recall_threshold=0.8,
-                f1_score_threshold=0.75
+            task_navigation_efficiency_eval = TaskNavigationEfficiencyEvaluator(
+                matching_mode=TaskNavigationEfficiencyMatchingMode.EXACT_MATCH
             )
 
             # Example 1: Using simple tool names list
@@ -64,36 +97,39 @@ class PathEfficiencyEvaluator(EvaluatorBase):
             )
     """
 
-    _DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD = 0.5
-
-    id = "azureai://built-in/evaluators/path_efficiency"
+    id = "azureai://built-in/evaluators/task_navigation_efficiency"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
 
+    matching_mode: TaskNavigationEfficiencyMatchingMode
+    """The matching mode to use."""
+
     @override
     def __init__(
         self,
         *,
-        precision_threshold: float = _DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD,
-        recall_threshold: float = _DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD,
-        f1_score_threshold: float = _DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD,
+        matching_mode: Union[
+            str, TaskNavigationEfficiencyMatchingMode
+        ] = TaskNavigationEfficiencyMatchingMode.EXACT_MATCH,
     ):
-        self._higher_is_better = True
-        super().__init__()
+        # Type checking for metric parameter
+        if isinstance(matching_mode, str):
+            try:
+                self.matching_mode = TaskNavigationEfficiencyMatchingMode(matching_mode)
+            except ValueError:
+                raise ValueError(
+                    f"matching_mode must be one of {[m.value for m in TaskNavigationEfficiencyMatchingMode]}, got '{matching_mode}'"
+                )
+        elif isinstance(matching_mode, TaskNavigationEfficiencyMatchingMode):
+            self.matching_mode = matching_mode
+        else:
+            raise EvaluationException(
+                f"matching_mode must be a string with one of {[m.value for m in TaskNavigationEfficiencyMatchingMode]} or TaskNavigationEfficiencyMatchingMode enum, got {type(matching_mode)}",
+                internal_message=str(matching_mode),
+                target=ErrorTarget.TASK_NAVIGATION_EFFICIENCY_EVALUATOR,
+                category=ErrorCategory.INVALID_VALUE,
+            )
 
-        # Type checking for threshold parameters
-        for name, value in [
-            ("precision_threshold", precision_threshold),
-            ("recall_threshold", recall_threshold),
-            ("f1_score_threshold", f1_score_threshold),
-        ]:
-            if not isinstance(value, float):
-                raise TypeError(f"{name} must be a float, got {type(value)}")
-
-        self._threshold = {
-            "path_efficiency_precision": precision_threshold,
-            "path_efficiency_recall": recall_threshold,
-            "path_efficiency_f1": f1_score_threshold,
-        }
+        super().__init__()
 
     def _prepare_steps_for_comparison(
         self,
@@ -192,14 +228,20 @@ def _calculate_any_order_match(self, agent_steps: List, ground_truth_steps: List
         # Check if agent has at least as many occurrences of each ground truth step
         return all(agent_counts[step] >= ground_truth_counts[step] for step in ground_truth_counts)
 
+    _TASK_NAVIGATION_EFFICIENCY_MATCHING_MODE_TO_FUNCTIONS = {
+        TaskNavigationEfficiencyMatchingMode.EXACT_MATCH: _calculate_exact_match,
+        TaskNavigationEfficiencyMatchingMode.IN_ORDER_MATCH: _calculate_in_order_match,
+        TaskNavigationEfficiencyMatchingMode.ANY_ORDER_MATCH: _calculate_any_order_match,
+    }
+
     @override
-    async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:
+    async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str, Dict[str, float]]]:
         """Produce a path efficiency evaluation result.
 
         :param eval_input: The input to the evaluation function. Must contain "response" and "ground_truth".
         :type eval_input: Dict
         :return: The evaluation result.
-        :rtype: Dict[str, Union[float, str]]
+        :rtype: Dict[str, Union[float, str, Dict[str, float]]]
         """
         response = eval_input["response"]
         ground_truth = eval_input["ground_truth"]
@@ -244,12 +286,10 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:
             ground_truth_names = [name.strip() for name in tool_names_list]
             ground_truth_params_dict = params_dict
             use_parameter_matching = True
-
         elif isinstance(ground_truth, list) and all(isinstance(step, str) for step in ground_truth):
             # List format: just tool names
             ground_truth_names = [step.strip() for step in ground_truth]
             use_parameter_matching = False
-
         else:
             raise TypeError(
                 "ground_truth must be a list of strings or a tuple of (list[str], dict[str, dict[str, str]])"
@@ -267,42 +307,43 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:
         )
 
         # Calculate precision, recall, and F1 scores
-        metrics = self._calculate_precision_recall_f1_scores(agent_steps, ground_truth_steps)
-
-        # Calculate binary match metrics
-        exact_match = self._calculate_exact_match(agent_steps, ground_truth_steps)
-        in_order_match = self._calculate_in_order_match(agent_steps, ground_truth_steps)
-        any_order_match = self._calculate_any_order_match(agent_steps, ground_truth_steps)
+        additional_properties_metrics = self._calculate_precision_recall_f1_scores(agent_steps, ground_truth_steps)
 
         # Convert metrics to floats, using nan for None or non-convertible values
-        path_efficiency_precision = (
-            float(metrics["precision_score"]) if metrics["precision_score"] is not None else float("nan")
-        )
-        path_efficiency_recall = float(metrics["recall_score"]) if metrics["recall_score"] is not None else float("nan")
-        path_efficiency_f1_score = float(metrics["f1_score"]) if metrics["f1_score"] is not None else float("nan")
+        for metric, score in additional_properties_metrics.items():
+            additional_properties_metrics[metric] = float(score) if score is not None else float("nan")
 
-        return {
-            "path_efficiency_precision_score": path_efficiency_precision,
-            "path_efficiency_recall_score": path_efficiency_recall,
-            "path_efficiency_f1_score": path_efficiency_f1_score,
-            "path_efficiency_exact_match_result": EVALUATION_PASS_FAIL_MAPPING[exact_match],
-            "path_efficiency_in_order_match_result": EVALUATION_PASS_FAIL_MAPPING[in_order_match],
-            "path_efficiency_any_order_match_result": EVALUATION_PASS_FAIL_MAPPING[any_order_match],
-        }
+        if self.matching_mode in self._TASK_NAVIGATION_EFFICIENCY_MATCHING_MODE_TO_FUNCTIONS:
+            # Calculate binary match metrics
+            match_result = self._TASK_NAVIGATION_EFFICIENCY_MATCHING_MODE_TO_FUNCTIONS[self.matching_mode](
+                self, agent_steps, ground_truth_steps
+            )
+
+            return {
+                "task_navigation_efficiency_result": EVALUATION_PASS_FAIL_MAPPING[match_result],
+                "properties": additional_properties_metrics,
+            }
+        else:
+            raise EvaluationException(
+                f"Unsupported matching_mode '{self.matching_mode}'",
+                internal_message=str(self.matching_mode),
+                target=ErrorTarget.TASK_NAVIGATION_EFFICIENCY_EVALUATOR,
+                category=ErrorCategory.INVALID_VALUE,
+            )
 
     @overload
     def __call__(  # type: ignore
         self, *, response: Union[str, List[Dict[str, Any]]], ground_truth: List[str]
-    ) -> Dict[str, Union[float, str]]:
+    ) -> Dict[str, Union[float, str, Dict[str, float]]]:
         """
-        Evaluate the path efficiency of an agent's action sequence.
+        Evaluate the task navigation efficiency of an agent's action sequence.
 
         :keyword response: The agent's response containing tool calls.
         :paramtype response: Union[str, List[Dict[str, Any]]]
         :keyword ground_truth: List of expected tool/action steps.
         :paramtype ground_truth: List[str]
-        :return: The path efficiency scores and results.
-        :rtype: Dict[str, Union[float, str]]
+        :return: The task navigation efficiency scores and results.
+        :rtype: Dict[str, Union[float, str, Dict[str, float]]]
         """
 
     @overload
@@ -311,16 +352,16 @@ def __call__(  # type: ignore
         *,
         response: Union[str, List[Dict[str, Any]]],
         ground_truth: Tuple[List[str], Dict[str, Dict[str, str]]],
-    ) -> Dict[str, Union[float, str]]:
+    ) -> Dict[str, Union[float, str, Dict[str, float]]]:
         """
-        Evaluate the path efficiency of an agent's action sequence with tool parameters.
+        Evaluate the task navigation efficiency of an agent's action sequence with tool parameters.
 
         :keyword response: The agent's response containing tool calls.
         :paramtype response: Union[str, List[Dict[str, Any]]]
         :keyword ground_truth: Tuple of (tool names list, parameters dict) where parameters must match exactly.
         :paramtype ground_truth: Tuple[List[str], Dict[str, Dict[str, str]]]
-        :return: The path efficiency scores and results.
-        :rtype: Dict[str, Union[float, str]]
+        :return: The task navigation efficiency scores and results.
+        :rtype: Dict[str, Union[float, str, Dict[str, float]]]
         """
 
     @override
@@ -330,13 +371,13 @@ def __call__(
         **kwargs,
     ):
         """
-        Evaluate path efficiency.
+        Evaluate task navigation efficiency.
 
         :keyword response: The agent's response containing tool calls.
         :paramtype response: Union[str, List[Dict[str, Any]]]
         :keyword ground_truth: List of expected tool/action steps or tuple of (tool names, parameters dict).
         :paramtype ground_truth: Union[List[str], Tuple[List[str], Dict[str, Dict[str, str]]]]
-        :return: The path efficiency scores and results.
-        :rtype: Dict[str, Union[float, str]]
+        :return: The task navigation efficiency scores and results.
+        :rtype: Dict[str, Union[float, str, Dict[str, float]]]
         """
         return super().__call__(*args, **kwargs)
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py
index 9b28686b9bf6..fa24d6c72aed 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py
@@ -78,6 +78,7 @@ class ErrorTarget(Enum):
     ECI_EVALUATOR = "ECIEvaluator"
     F1_EVALUATOR = "F1Evaluator"
     GROUNDEDNESS_EVALUATOR = "GroundednessEvaluator"
+    TASK_NAVIGATION_EFFICIENCY_EVALUATOR = "TaskNavigationEfficiencyEvaluator"
     PROTECTED_MATERIAL_EVALUATOR = "ProtectedMaterialEvaluator"
     INTENT_RESOLUTION_EVALUATOR = "IntentResolutionEvaluator"
     RELEVANCE_EVALUATOR = "RelevanceEvaluator"
diff --git a/sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/path_efficiency.ipynb b/sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/task_navigation_efficiency.ipynb
similarity index 64%
rename from sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/path_efficiency.ipynb
rename to sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/task_navigation_efficiency.ipynb
index 7be23dd4167d..924c4c9c01a0 100644
--- a/sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/path_efficiency.ipynb
+++ b/sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/task_navigation_efficiency.ipynb
@@ -5,7 +5,7 @@
    "id": "d1de6f2b",
    "metadata": {},
    "source": [
-    "# Path Efficiency Evaluator"
+    "# Task Navigation Efficiency Evaluator"
    ]
   },
   {
@@ -15,12 +15,13 @@
    "source": [
     "### Getting Started\n",
     "\n",
-    "This sample demonstrates how to use Path Efficiency Evaluator\n",
+    "This sample demonstrates how to use the Task Navigation Efficiency Evaluator to evaluate whether an agent's sequence of actions follows optimal decision-making patterns.\n",
+    "\n",
     "Before running the sample:\n",
     "```bash\n",
     "pip install azure-ai-projects azure-identity azure-ai-evaluation\n",
     "```\n",
-    "Note: The Path Efficiency Evaluator does not require Azure OpenAI configuration as it's a rule-based evaluator."
+    "Note: The Task Navigation Efficiency Evaluator does not require Azure OpenAI configuration as it's a rule-based evaluator."
    ]
   },
   {
@@ -28,23 +29,26 @@
    "id": "dbc5612b",
    "metadata": {},
    "source": [
-    "The Path Efficiency Evaluator measures how efficient an agent's sequence of actions is compared to an optimal path.\n",
+    "The Task Navigation Efficiency Evaluator measures how efficiently an agent navigates through a sequence of actions compared to an optimal task completion path.\n",
+    "\n",
+    "The evaluator provides comprehensive evaluation with both binary matching results and additional detailed P\\R\\F1 results:\n",
+    "\n",
+    "**Primary Result:**\n",
+    "- **Binary Match Result**: Pass/Fail based on the selected matching mode\n",
     "\n",
-    "The evaluator provides multiple metrics:\n",
+    "**Available Matching Modes:**\n",
+    "- **Exact Match**: Agent's tool calls must exactly match the ground truth (default)\n",
+    "- **In-Order Match**: All ground truth steps must appear in correct order (allows extra steps)\n",
+    "- **Any-Order Match**: All ground truth steps must appear with sufficient frequency (most lenient)\n",
     "\n",
-    "**Numeric Scores (0.0 - 1.0):**\n",
+    "**Properties Bag Additional Metrics (0.0 - 1.0):**\n",
     "- **Precision**: How many of the agent's steps were necessary (relevant to ground truth)\n",
     "- **Recall**: How many of the required steps were executed by the agent  \n",
     "- **F1 Score**: Harmonic mean of precision and recall\n",
     "\n",
-    "**Binary Match Results (pass/fail):**\n",
-    "- **Exact Match**: Whether the agent's steps exactly match the ground truth\n",
-    "- **In-Order Match**: Whether all ground truth steps appear in correct order (allows extra steps)\n",
-    "- **Any-Order Match**: Whether all ground truth steps appear with sufficient frequency (ignores order, allows extra steps)\n",
-    "\n",
     "The evaluation requires the following inputs:\n",
     "- **Response**: The agent's response containing tool calls as a list of messages\n",
-    "- **Ground Truth**: List of expected tool/action steps as strings"
+    "- **Ground Truth**: List of expected tool/action steps as strings, or tuple with parameters for matching"
    ]
   },
   {
@@ -52,7 +56,7 @@
    "id": "1be910ff",
    "metadata": {},
    "source": [
-    "### Initialize Path Efficiency Evaluator"
+    "### Initialize Task Navigation Efficiency Evaluator"
    ]
   },
   {
@@ -62,18 +66,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from azure.ai.evaluation._evaluators._path_efficiency import PathEfficiencyEvaluator\n",
+    "from azure.ai.evaluation._evaluators._task_navigation_efficiency import TaskNavigationEfficiencyEvaluator, TaskNavigationEfficiencyMatchingMode\n",
     "from pprint import pprint\n",
     "\n",
-    "# Initialize with custom thresholds\n",
-    "path_efficiency_evaluator = PathEfficiencyEvaluator(\n",
-    "    precision_threshold=0.7,\n",
-    "    recall_threshold=0.8, \n",
-    "    f1_score_threshold=0.75\n",
+    "# Initialize with exact match mode\n",
+    "task_navigation_efficiency_evaluator = TaskNavigationEfficiencyEvaluator(\n",
+    "    matching_mode=TaskNavigationEfficiencyMatchingMode.EXACT_MATCH\n",
     ")\n",
     "\n",
-    "# Or use default thresholds (0.5 for all metrics)\n",
-    "# path_efficiency_evaluator = PathEfficiencyEvaluator()"
+    "# Other examples:\n",
+    "# For in-order matching (allows extra steps but requires correct order)\n",
+    "# task_navigation_efficiency_evaluator = TaskNavigationEfficiencyEvaluator(matching_mode=TaskNavigationEfficiencyMatchingMode.IN_ORDER_MATCH)\n",
+    "\n",
+    "# For any-order matching (most lenient - allows extra steps and different order)  \n",
+    "# task_navigation_efficiency_evaluator = TaskNavigationEfficiencyEvaluator(matching_mode=TaskNavigationEfficiencyMatchingMode.ANY_ORDER_MATCH)\n",
+    "\n",
+    "# Or use defaults (exact match mode)\n",
+    "# task_navigation_efficiency_evaluator = TaskNavigationEfficiencyEvaluator()"
    ]
   },
   {
@@ -81,7 +90,7 @@
    "id": "0247c79d",
    "metadata": {},
    "source": [
-    "### Samples"
+    "### Task Navigation Efficiency Examples"
    ]
   },
   {
@@ -117,7 +126,7 @@
     "\n",
     "ground_truth = [\"search\", \"analyze\", \"report\"]\n",
     "\n",
-    "result = path_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
+    "result = task_navigation_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
     "print(\"Perfect Path Results:\")\n",
     "pprint(result)"
    ]
@@ -137,7 +146,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Agent includes extra steps but maintains correct order\n",
+    "# Agent performs all required steps but with extra unnecessary step\n",
     "response = [\n",
     "    {\n",
     "        \"role\": \"assistant\",\n",
@@ -159,7 +168,7 @@
     "\n",
     "ground_truth = [\"search\", \"analyze\", \"report\"]\n",
     "\n",
-    "result = path_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
+    "result = task_navigation_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
     "print(\"\\nPath with Extra Steps Results:\")\n",
     "pprint(result)"
    ]
@@ -197,7 +206,10 @@
     "\n",
     "ground_truth = [\"search\", \"analyze\", \"report\"]\n",
     "\n",
-    "result = path_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
+    "# Using in-order matching mode to demonstrate the difference\n",
+    "in_order_task_navigation_efficiency_evaluator = TaskNavigationEfficiencyEvaluator(matching_mode=TaskNavigationEfficiencyMatchingMode.IN_ORDER_MATCH)\n",
+    "\n",
+    "result = in_order_task_navigation_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
     "print(\"\\nWrong Order Results:\")\n",
     "pprint(result)"
    ]
@@ -217,7 +229,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Agent misses some required steps\n",
+    "# Agent performs only some of the required steps (incomplete)\n",
     "response = [\n",
     "    {\n",
     "        \"role\": \"assistant\",\n",
@@ -231,7 +243,7 @@
     "\n",
     "ground_truth = [\"search\", \"analyze\", \"report\"]\n",
     "\n",
-    "result = path_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
+    "result = task_navigation_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
     "print(\"\\nMissing Steps Results:\")\n",
     "pprint(result)"
    ]
@@ -251,7 +263,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Customer service agent handling order inquiry\n",
+    "# Real-world example: Customer service agent handling a refund request\n",
     "response = [\n",
     "    {\n",
     "        \"role\": \"assistant\",\n",
@@ -273,7 +285,7 @@
     "\n",
     "ground_truth = [\"lookup_order\", \"calculate_refund\", \"process_refund\"]\n",
     "\n",
-    "result = path_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
+    "result = task_navigation_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
     "print(\"\\nCustomer Service Results:\")\n",
     "pprint(result)"
    ]
@@ -319,7 +331,7 @@
     "\n",
     "ground_truth = [\"search\", \"analyze\", \"report\"]\n",
     "\n",
-    "result = path_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
+    "result = task_navigation_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
     "print(\"\\nComplex Path with Duplicates Results:\")\n",
     "pprint(result)"
    ]
@@ -339,12 +351,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Test with empty agent response\n",
+    "# Test edge cases\n",
+    "\n",
+    "# Test with empty response\n",
     "try:\n",
     "    response = []\n",
     "    ground_truth = [\"search\", \"analyze\", \"report\"]\n",
     "    \n",
-    "    result = path_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
+    "    result = task_navigation_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
     "    print(\"\\nEmpty Response Results:\")\n",
     "    pprint(result)\n",
     "except Exception as e:\n",
@@ -360,7 +374,7 @@
     "    ]\n",
     "    ground_truth = []\n",
     "    \n",
-    "    result = path_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
+    "    result = task_navigation_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
     "    print(\"\\nEmpty Ground Truth Results:\")\n",
     "    pprint(result)\n",
     "except Exception as e:\n",
@@ -382,7 +396,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# PathEfficiencyEvaluator also supports tuple format with parameters for exact parameter matching\n",
+    "# TaskNavigationEfficiencyEvaluator also supports tuple format with parameters for exact parameter matching\n",
     "response_with_params = [\n",
     "    {\n",
     "        \"role\": \"assistant\",\n",
@@ -394,7 +408,7 @@
     "# Parameters must match exactly for tools to be considered matching\n",
     "ground_truth_with_params = ([\"search\"], {\"search\": {\"query\": \"test\"}})\n",
     "\n",
-    "result = path_efficiency_evaluator(response=response_with_params, ground_truth=ground_truth_with_params)\n",
+    "result = task_navigation_efficiency_evaluator(response=response_with_params, ground_truth=ground_truth_with_params)\n",
     "print(\"\\nTuple Format with Parameters Results:\")\n",
     "pprint(result)"
    ]
@@ -414,32 +428,60 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def analyze_path_efficiency(response, ground_truth, scenario_name):\n",
+    "# Helper functions for analysis\n",
+    "\n",
+    "def analyze_task_navigation_efficiency(response, ground_truth, scenario_name, evaluator=None):\n",
     "    \"\"\"\n",
-    "    Helper function to analyze and display path efficiency results\n",
+    "    Helper function to analyze and display task navigation efficiency results\n",
     "    \"\"\"\n",
-    "    result = path_efficiency_evaluator(response=response, ground_truth=ground_truth)\n",
+    "    if evaluator is None:\n",
+    "        evaluator = task_navigation_efficiency_evaluator\n",
+    "        \n",
+    "    result = evaluator(response=response, ground_truth=ground_truth)\n",
     "    \n",
     "    print(f\"\\n{'='*50}\")\n",
     "    print(f\"Analysis for: {scenario_name}\")\n",
     "    print(f\"{'='*50}\")\n",
     "    \n",
     "    print(f\"Ground Truth Steps: {ground_truth}\")\n",
+    "    print(f\"Evaluator Matching Mode: {evaluator.matching_mode.value}\")\n",
     "    print(f\"{'='*50}\")\n",
     "    \n",
-    "    # Numeric scores\n",
-    "    print(\"Numeric Scores:\")\n",
-    "    print(f\"  Precision: {result['path_efficiency_precision_score']:.3f}\")\n",
-    "    print(f\"  Recall: {result['path_efficiency_recall_score']:.3f}\")\n",
-    "    print(f\"  F1 Score: {result['path_efficiency_f1_score']:.3f}\")\n",
-    "\n",
-    "    # Binary matches\n",
-    "    print(\"\\nBinary Match Results:\")\n",
-    "    print(f\"  Exact Match: {result['path_efficiency_exact_match_result']}\")\n",
-    "    print(f\"  In-Order Match: {result['path_efficiency_in_order_match_result']}\")\n",
-    "    print(f\"  Any-Order Match: {result['path_efficiency_any_order_match_result']}\")\n",
-    "\n",
-    "    return result"
+    "    # Display the returned results\n",
+    "    for key, value in result.items():\n",
+    "        if key == \"properties\":\n",
+    "            print(f\"  {key}:\")\n",
+    "            for prop_key, prop_value in value.items():\n",
+    "                print(f\"    {prop_key}: {prop_value:.3f}\")\n",
+    "        else:\n",
+    "            print(f\"  {key}: {value}\")\n",
+    "\n",
+    "    return result\n",
+    "\n",
+    "# Example with different matching modes\n",
+    "def compare_matching_modes(response, ground_truth, scenario_name):\n",
+    "    \"\"\"\n",
+    "    Compare results across different matching modes for the same scenario\n",
+    "    \"\"\"\n",
+    "    print(f\"\\n{'='*60}\")\n",
+    "    print(f\"Matching Mode Comparison for: {scenario_name}\")\n",
+    "    print(f\"{'='*60}\")\n",
+    "    \n",
+    "    matching_modes_to_test = [\n",
+    "        TaskNavigationEfficiencyMatchingMode.EXACT_MATCH,\n",
+    "        TaskNavigationEfficiencyMatchingMode.IN_ORDER_MATCH,\n",
+    "        TaskNavigationEfficiencyMatchingMode.ANY_ORDER_MATCH\n",
+    "    ]\n",
+    "    \n",
+    "    for mode in matching_modes_to_test:\n",
+    "        evaluator = TaskNavigationEfficiencyEvaluator(matching_mode=mode)\n",
+    "        result = evaluator(response=response, ground_truth=ground_truth)\n",
+    "        \n",
+    "        # Get the main result value\n",
+    "        result_value = result.get(\"task_navigation_efficiency_result\", \"N/A\")\n",
+    "        print(f\"  {mode.value.upper():15}: {result_value}\")\n",
+    "    \n",
+    "    return"
    ]
   },
   {
@@ -467,7 +509,7 @@
     "]\n",
     "perfect_ground_truth = [\"authenticate\", \"fetch_data\", \"process_result\"]\n",
     "\n",
-    "analyze_path_efficiency(perfect_response, perfect_ground_truth, \"Perfect Efficiency Example\")\n",
+    "analyze_task_navigation_efficiency(perfect_response, perfect_ground_truth, \"Perfect Efficiency Example\")\n",
     "\n",
     "# Scenario 2: Inefficient with extra steps\n",
     "inefficient_response = [\n",
@@ -479,7 +521,34 @@
     "]\n",
     "inefficient_ground_truth = [\"authenticate\", \"fetch_data\", \"process_result\"]\n",
     "\n",
-    "analyze_path_efficiency(inefficient_response, inefficient_ground_truth, \"Inefficient Path with Extra Steps\")"
+    "analyze_task_navigation_efficiency(inefficient_response, inefficient_ground_truth, \"Inefficient Path with Extra Steps\")\n",
+    "\n",
+    "# Demonstrate different matching modes\n",
+    "print(\"\\n\" + \"=\"*60)\n",
+    "print(\"COMPARING DIFFERENT MATCHING MODES\")\n",
+    "print(\"=\"*60)\n",
+    "\n",
+    "compare_matching_modes(inefficient_response, inefficient_ground_truth, \"Inefficient Path Analysis\")\n",
+    "\n",
+    "# Example: Creating evaluators with different matching modes\n",
+    "print(f\"\\n{'='*60}\")\n",
+    "print(\"INDIVIDUAL MATCHING MODE EXAMPLES\")\n",
+    "print(\"=\"*60)\n",
+    "\n",
+    "# Exact match evaluator\n",
+    "exact_match_evaluator = TaskNavigationEfficiencyEvaluator(matching_mode=TaskNavigationEfficiencyMatchingMode.EXACT_MATCH)\n",
+    "exact_result = exact_match_evaluator(response=perfect_response, ground_truth=perfect_ground_truth)\n",
+    "print(f\"Exact Match Evaluator: {exact_result}\")\n",
+    "\n",
+    "# In-order match evaluator\n",
+    "in_order_evaluator = TaskNavigationEfficiencyEvaluator(matching_mode=TaskNavigationEfficiencyMatchingMode.IN_ORDER_MATCH)\n",
+    "in_order_result = in_order_evaluator(response=inefficient_response, ground_truth=inefficient_ground_truth)\n",
+    "print(f\"In-Order Match Evaluator: {in_order_result}\")\n",
+    "\n",
+    "# Any-order match evaluator (most lenient)\n",
+    "any_order_evaluator = TaskNavigationEfficiencyEvaluator(matching_mode=TaskNavigationEfficiencyMatchingMode.ANY_ORDER_MATCH)\n",
+    "any_order_result = any_order_evaluator(response=inefficient_response, ground_truth=inefficient_ground_truth)\n",
+    "print(f\"Any-Order Match Evaluator: {any_order_result}\")"
    ]
   }
  ],
diff --git a/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate.py b/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate.py
index a6a0f3b6805d..3f96ccad9de9 100644
--- a/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate.py
@@ -575,11 +575,14 @@ def evaluation_evaluate_classes_methods(self):
         )
         # [END tool_call_accuracy_evaluator]
 
-        # [START path_efficiency_evaluator]
-        from azure.ai.evaluation._evaluators._path_efficiency import PathEfficiencyEvaluator
+        # [START task_navigation_efficiency_evaluator]
+        from azure.ai.evaluation._evaluators._task_navigation_efficiency import (
+            TaskNavigationEfficiencyEvaluator,
+            TaskNavigationEfficiencyMatchingMode,
+        )
 
-        path_efficiency_evaluator = PathEfficiencyEvaluator(
-            precision_threshold=0.7, recall_threshold=0.8, f1_score_threshold=0.75
+        task_navigation_efficiency_evaluator = TaskNavigationEfficiencyEvaluator(
+            matching_mode=TaskNavigationEfficiencyMatchingMode.EXACT_MATCH
         )
 
         response = [
@@ -598,7 +601,7 @@ def evaluation_evaluate_classes_methods(self):
         ]
         ground_truth = ["search", "analyze", "report"]
 
-        path_efficiency_evaluator(response=response, ground_truth=ground_truth)
+        task_navigation_efficiency_evaluator(response=response, ground_truth=ground_truth)
 
         # Also supports tuple format with parameters for exact parameter matching
         response_with_params = [
@@ -611,8 +614,8 @@ def evaluation_evaluate_classes_methods(self):
         ]
         ground_truth_with_params = (["search"], {"search": {"query": "test"}})
 
-        path_efficiency_evaluator(response=response_with_params, ground_truth=ground_truth_with_params)
-        # [END path_efficiency_evaluator]
+        task_navigation_efficiency_evaluator(response=response_with_params, ground_truth=ground_truth_with_params)
+        # [END task_navigation_efficiency_evaluator]
 
         # [START document_retrieval_evaluator]
         from azure.ai.evaluation import DocumentRetrievalEvaluator
diff --git a/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate_fdp.py b/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate_fdp.py
index d6b023a581b1..cd6e6b8b36fe 100644
--- a/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate_fdp.py
+++ b/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate_fdp.py
@@ -581,11 +581,14 @@ def evaluation_evaluate_classes_methods(self):
         )
         # [END tool_call_accuracy_evaluator]
 
-        # [START path_efficiency_evaluator]
-        from azure.ai.evaluation._evaluators._path_efficiency import PathEfficiencyEvaluator
+        # [START task_navigation_efficiency_evaluator]
+        from azure.ai.evaluation._evaluators._task_navigation_efficiency import (
+            TaskNavigationEfficiencyEvaluator,
+            TaskNavigationEfficiencyMatchingMode,
+        )
 
-        path_efficiency_evaluator = PathEfficiencyEvaluator(
-            precision_threshold=0.7, recall_threshold=0.8, f1_score_threshold=0.75
+        task_navigation_efficiency_evaluator = TaskNavigationEfficiencyEvaluator(
+            matching_mode=TaskNavigationEfficiencyMatchingMode.EXACT_MATCH
         )
 
         response = [
@@ -604,7 +607,7 @@ def evaluation_evaluate_classes_methods(self):
         ]
         ground_truth = ["search", "analyze", "report"]
 
-        path_efficiency_evaluator(response=response, ground_truth=ground_truth)
+        task_navigation_efficiency_evaluator(response=response, ground_truth=ground_truth)
 
         # Also supports tuple format with parameters for exact parameter matching
         response_with_params = [
@@ -617,8 +620,8 @@ def evaluation_evaluate_classes_methods(self):
         ]
         ground_truth_with_params = (["search"], {"search": {"query": "test"}})
 
-        path_efficiency_evaluator(response=response_with_params, ground_truth=ground_truth_with_params)
-        # [END path_efficiency_evaluator]
+        task_navigation_efficiency_evaluator(response=response_with_params, ground_truth=ground_truth_with_params)
+        # [END task_navigation_efficiency_evaluator]
 
         # [START document_retrieval_evaluator]
         from azure.ai.evaluation import DocumentRetrievalEvaluator
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_path_efficiency_evaluators.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_path_efficiency_evaluators.py
deleted file mode 100644
index 3b3deea754a8..000000000000
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_path_efficiency_evaluators.py
+++ /dev/null
@@ -1,499 +0,0 @@
-import pytest
-from azure.ai.evaluation._evaluators._path_efficiency import PathEfficiencyEvaluator
-
-
-@pytest.mark.unittest
-class TestPathEfficiencyEvaluator:
-    def test_exact_match_scenario(self):
-        """Test when agent steps exactly match ground truth."""
-        evaluator = PathEfficiencyEvaluator()
-
-        response = [
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_1", "name": "search", "arguments": {}}],
-            },
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_2", "name": "analyze", "arguments": {}}],
-            },
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_3", "name": "report", "arguments": {}}],
-            },
-        ]
-        ground_truth = ["search", "analyze", "report"]
-
-        result = evaluator(response=response, ground_truth=ground_truth)
-
-        assert result["path_efficiency_precision_score"] == 1.0
-        assert (
-            result["path_efficiency_precision_threshold"]
-            == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_precision_result"] == "pass"
-        assert result["path_efficiency_recall_score"] == 1.0
-        assert (
-            result["path_efficiency_recall_threshold"]
-            == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_recall_result"] == "pass"
-        assert result["path_efficiency_f1_score"] == 1.0
-        assert (
-            result["path_efficiency_f1_threshold"] == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_f1_result"] == "pass"
-        assert result["path_efficiency_exact_match_result"] == "pass"
-        assert result["path_efficiency_in_order_match_result"] == "pass"
-        assert result["path_efficiency_any_order_match_result"] == "pass"
-
-    def test_in_order_match_with_extra_steps(self):
-        """Test when agent has extra steps but maintains order."""
-        evaluator = PathEfficiencyEvaluator()
-
-        response = [
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_1", "name": "search", "arguments": {}}],
-            },
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_2", "name": "extra_step", "arguments": {}}],
-            },
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_3", "name": "analyze", "arguments": {}}],
-            },
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_4", "name": "report", "arguments": {}}],
-            },
-        ]
-        ground_truth = ["search", "analyze", "report"]
-
-        result = evaluator(response=response, ground_truth=ground_truth)
-
-        assert result["path_efficiency_precision_score"] == 0.75  # 3/4
-        assert (
-            result["path_efficiency_precision_threshold"]
-            == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_precision_result"] == "pass"
-        assert result["path_efficiency_recall_score"] == 1.0  # 3/3
-        assert (
-            result["path_efficiency_recall_threshold"]
-            == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_recall_result"] == "pass"
-        assert result["path_efficiency_f1_score"] == pytest.approx(0.857, rel=1e-2)
-        assert (
-            result["path_efficiency_f1_threshold"] == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_f1_result"] == "pass"
-        assert result["path_efficiency_exact_match_result"] == "fail"
-        assert result["path_efficiency_in_order_match_result"] == "pass"
-        assert result["path_efficiency_any_order_match_result"] == "pass"
-
-    def test_any_order_match(self):
-        """Test when agent has all steps but in wrong order."""
-        evaluator = PathEfficiencyEvaluator()
-
-        response = [
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_1", "name": "report", "arguments": {}}],
-            },
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_2", "name": "search", "arguments": {}}],
-            },
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_3", "name": "analyze", "arguments": {}}],
-            },
-        ]
-        ground_truth = ["search", "analyze", "report"]
-
-        result = evaluator(response=response, ground_truth=ground_truth)
-
-        assert result["path_efficiency_precision_score"] == 1.0
-        assert (
-            result["path_efficiency_precision_threshold"]
-            == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_precision_result"] == "pass"
-        assert result["path_efficiency_recall_score"] == 1.0
-        assert (
-            result["path_efficiency_recall_threshold"]
-            == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_recall_result"] == "pass"
-        assert result["path_efficiency_f1_score"] == 1.0
-        assert (
-            result["path_efficiency_f1_threshold"] == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_f1_result"] == "pass"
-        assert result["path_efficiency_exact_match_result"] == "fail"
-        assert result["path_efficiency_in_order_match_result"] == "fail"
-        assert result["path_efficiency_any_order_match_result"] == "pass"
-
-    def test_partial_match(self):
-        """Test when agent misses some steps and has extra steps."""
-        evaluator = PathEfficiencyEvaluator()
-
-        response = [
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_1", "name": "search", "arguments": {}}],
-            },
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_2", "name": "wrong_step", "arguments": {}}],
-            },
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_3", "name": "report", "arguments": {}}],
-            },
-        ]
-        ground_truth = ["search", "analyze", "report"]
-
-        result = evaluator(response=response, ground_truth=ground_truth)
-
-        assert result["path_efficiency_precision_score"] == pytest.approx(0.667, rel=1e-2)  # 2/3
-        assert (
-            result["path_efficiency_precision_threshold"]
-            == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_precision_result"] == "pass"
-        assert result["path_efficiency_recall_score"] == pytest.approx(0.667, rel=1e-2)  # 2/3
-        assert (
-            result["path_efficiency_recall_threshold"]
-            == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_recall_result"] == "pass"
-        assert result["path_efficiency_f1_score"] == pytest.approx(0.667, rel=1e-2)
-        assert (
-            result["path_efficiency_f1_threshold"] == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_f1_result"] == "pass"
-        assert result["path_efficiency_exact_match_result"] == "fail"
-        assert result["path_efficiency_in_order_match_result"] == "fail"
-        assert result["path_efficiency_any_order_match_result"] == "fail"
-
-    def test_no_matching_steps(self):
-        """Test when agent has no matching steps."""
-        evaluator = PathEfficiencyEvaluator()
-
-        response = [
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_1", "name": "wrong1", "arguments": {}}],
-            },
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_2", "name": "wrong2", "arguments": {}}],
-            },
-        ]
-        ground_truth = ["search", "analyze", "report"]
-
-        result = evaluator(response=response, ground_truth=ground_truth)
-
-        assert result["path_efficiency_precision_score"] == 0.0
-        assert (
-            result["path_efficiency_precision_threshold"]
-            == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_precision_result"] == "fail"
-        assert result["path_efficiency_recall_score"] == 0.0
-        assert (
-            result["path_efficiency_recall_threshold"]
-            == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_recall_result"] == "fail"
-        assert result["path_efficiency_f1_score"] == 0.0
-        assert (
-            result["path_efficiency_f1_threshold"] == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_f1_result"] == "fail"
-        assert result["path_efficiency_exact_match_result"] == "fail"
-        assert result["path_efficiency_in_order_match_result"] == "fail"
-        assert result["path_efficiency_any_order_match_result"] == "fail"
-
-    def test_empty_agent_steps(self):
-        """Test when agent has no tool calls."""
-        evaluator = PathEfficiencyEvaluator()
-
-        response = []
-        ground_truth = ["search", "analyze", "report"]
-
-        result = evaluator(response=response, ground_truth=ground_truth)
-
-        assert result["path_efficiency_precision_score"] == 0.0
-        assert (
-            result["path_efficiency_precision_threshold"]
-            == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_precision_result"] == "fail"
-        assert result["path_efficiency_recall_score"] == 0.0
-        assert (
-            result["path_efficiency_recall_threshold"]
-            == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_recall_result"] == "fail"
-        assert result["path_efficiency_f1_score"] == 0.0
-        assert (
-            result["path_efficiency_f1_threshold"] == PathEfficiencyEvaluator._DEFAULT_PATH_EFFICIENCY_SCORE_THRESHOLD
-        )
-        assert result["path_efficiency_f1_result"] == "fail"
-        assert result["path_efficiency_exact_match_result"] == "fail"
-        assert result["path_efficiency_in_order_match_result"] == "fail"
-        assert result["path_efficiency_any_order_match_result"] == "fail"
-
-    def test_call_method(self):
-        """Test using the __call__ method."""
-        evaluator = PathEfficiencyEvaluator(precision_threshold=0.8, recall_threshold=0.8, f1_score_threshold=0.8)
-
-        response = [
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_1", "name": "search", "arguments": {}}],
-            },
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_2", "name": "analyze", "arguments": {}}],
-            },
-            {
-                "role": "assistant",
-                "content": [{"type": "tool_call", "tool_call_id": "call_3", "name": "report", "arguments": {}}],
-            },
-        ]
-        ground_truth = ["search", "analyze", "report"]
-
-        result = evaluator(response=response, ground_truth=ground_truth)
-
-        assert result["path_efficiency_precision_score"] == 1.0
-        assert result["path_efficiency_recall_score"] == 1.0
-        assert result["path_efficiency_f1_score"] == 1.0
-        assert result["path_efficiency_precision_result"] == "pass"
-        assert result["path_efficiency_recall_result"] == "pass"
-        assert result["path_efficiency_f1_result"] == "pass"
-        assert result["path_efficiency_precision_threshold"] == 0.8
-        assert result["path_efficiency_recall_threshold"] == 0.8
-        assert result["path_efficiency_f1_threshold"] == 0.8
-        assert result["path_efficiency_exact_match_result"] == "pass"
-        assert result["path_efficiency_in_order_match_result"] == "pass"
-        assert result["path_efficiency_any_order_match_result"] == "pass"
-
-    def test_invalid_ground_truth(self):
-        """Test with invalid ground truth steps."""
-        evaluator = PathEfficiencyEvaluator()
-
-        with pytest.raises(TypeError):
-            evaluator(response=[], ground_truth="not_a_list")  # type: ignore
-
-        with pytest.raises(ValueError):
-            evaluator(response=[], ground_truth=[])
-
-    def test_tuple_format_with_parameters_exact_match(self):
-        """Test tuple format with exact parameter matching."""
-        evaluator = PathEfficiencyEvaluator()
-
-        response = [
-            {
-                "role": "assistant",
-                "content": [
-                    {
-                        "type": "tool_call",
-                        "tool_call_id": "call_1",
-                        "name": "search",
-                        "arguments": {"query": "weather", "location": "NYC"},
-                    }
-                ],
-            },
-            {
-                "role": "assistant",
-                "content": [
-                    {
-                        "type": "tool_call",
-                        "tool_call_id": "call_2",
-                        "name": "format_result",
-                        "arguments": {"format": "json", "style": "brief"},
-                    }
-                ],
-            },
-        ]
-
-        # Ground truth with tuple format: (tool_names, parameters_dict)
-        ground_truth = (
-            ["search", "format_result"],
-            {"search": {"query": "weather", "location": "NYC"}, "format_result": {"format": "json", "style": "brief"}},
-        )
-
-        result = evaluator(response=response, ground_truth=ground_truth)
-
-        # Should have perfect scores since everything matches exactly
-        assert result["path_efficiency_precision_score"] == 1.0
-        assert result["path_efficiency_recall_score"] == 1.0
-        assert result["path_efficiency_f1_score"] == 1.0
-        assert result["path_efficiency_exact_match_result"] == "pass"
-        assert result["path_efficiency_in_order_match_result"] == "pass"
-        assert result["path_efficiency_any_order_match_result"] == "pass"
-
-    def test_tuple_format_with_parameters_mismatch(self):
-        """Test tuple format with parameter mismatches."""
-        evaluator = PathEfficiencyEvaluator()
-
-        response = [
-            {
-                "role": "assistant",
-                "content": [
-                    {
-                        "type": "tool_call",
-                        "tool_call_id": "call_1",
-                        "name": "search",
-                        "arguments": {"query": "weather", "location": "LA"},  # Different location
-                    }
-                ],
-            },
-            {
-                "role": "assistant",
-                "content": [
-                    {
-                        "type": "tool_call",
-                        "tool_call_id": "call_2",
-                        "name": "format_result",
-                        "arguments": {"format": "xml", "style": "detailed"},  # Different parameters
-                    }
-                ],
-            },
-        ]
-
-        # Ground truth with tuple format
-        ground_truth = (
-            ["search", "format_result"],
-            {"search": {"query": "weather", "location": "NYC"}, "format_result": {"format": "json", "style": "brief"}},
-        )
-
-        result = evaluator(response=response, ground_truth=ground_truth)
-
-        # Should have zero scores since parameters don't match exactly
-        assert result["path_efficiency_precision_score"] == 0.0
-        assert result["path_efficiency_recall_score"] == 0.0
-        assert result["path_efficiency_f1_score"] == 0.0
-        assert result["path_efficiency_exact_match_result"] == "fail"
-        assert result["path_efficiency_in_order_match_result"] == "fail"
-        assert result["path_efficiency_any_order_match_result"] == "fail"
-
-    def test_tuple_format_with_parameters_partial_match(self):
-        """Test tuple format with some matching and some non-matching tools."""
-        evaluator = PathEfficiencyEvaluator()
-
-        response = [
-            {
-                "role": "assistant",
-                "content": [
-                    {
-                        "type": "tool_call",
-                        "tool_call_id": "call_1",
-                        "name": "search",
-                        "arguments": {"query": "weather", "location": "NYC"},  # Matches exactly
-                    }
-                ],
-            },
-            {
-                "role": "assistant",
-                "content": [
-                    {
-                        "type": "tool_call",
-                        "tool_call_id": "call_2",
-                        "name": "format_result",
-                        "arguments": {"format": "xml", "style": "detailed"},  # Different parameters
-                    }
-                ],
-            },
-            {
-                "role": "assistant",
-                "content": [
-                    {
-                        "type": "tool_call",
-                        "tool_call_id": "call_3",
-                        "name": "extra_tool",
-                        "arguments": {"param": "value"},  # Extra tool not in ground truth
-                    }
-                ],
-            },
-        ]
-
-        # Ground truth with tuple format
-        ground_truth = (
-            ["search", "format_result"],
-            {"search": {"query": "weather", "location": "NYC"}, "format_result": {"format": "json", "style": "brief"}},
-        )
-
-        result = evaluator(response=response, ground_truth=ground_truth)
-
-        # Only "search" matches (1 out of 3 agent tools, 1 out of 2 ground truth tools)
-        expected_precision = 1.0 / 3.0  # 1 match out of 3 agent tools
-        expected_recall = 1.0 / 2.0  # 1 match out of 2 ground truth tools
-        expected_f1 = 2 * expected_precision * expected_recall / (expected_precision + expected_recall)
-
-        assert result["path_efficiency_precision_score"] == pytest.approx(expected_precision, rel=1e-3)
-        assert result["path_efficiency_recall_score"] == pytest.approx(expected_recall, rel=1e-3)
-        assert result["path_efficiency_f1_score"] == pytest.approx(expected_f1, rel=1e-3)
-        assert result["path_efficiency_exact_match_result"] == "fail"
-        assert result["path_efficiency_in_order_match_result"] == "fail"
-        assert result["path_efficiency_any_order_match_result"] == "fail"
-
-    def test_tuple_format_with_empty_parameters(self):
-        """Test tuple format where some tools have no parameters."""
-        evaluator = PathEfficiencyEvaluator()
-
-        response = [
-            {
-                "role": "assistant",
-                "content": [
-                    {"type": "tool_call", "tool_call_id": "call_1", "name": "ping", "arguments": {}}  # No parameters
-                ],
-            },
-            {
-                "role": "assistant",
-                "content": [
-                    {"type": "tool_call", "tool_call_id": "call_2", "name": "log", "arguments": {"message": "success"}}
-                ],
-            },
-        ]
-
-        # Ground truth with tuple format including empty parameters
-        ground_truth = (["ping", "log"], {"ping": {}, "log": {"message": "success"}})  # Empty parameters dict
-
-        result = evaluator(response=response, ground_truth=ground_truth)
-
-        # Should have perfect scores since everything matches exactly
-        assert result["path_efficiency_precision_score"] == 1.0
-        assert result["path_efficiency_recall_score"] == 1.0
-        assert result["path_efficiency_f1_score"] == 1.0
-        assert result["path_efficiency_exact_match_result"] == "pass"
-        assert result["path_efficiency_in_order_match_result"] == "pass"
-        assert result["path_efficiency_any_order_match_result"] == "pass"
-
-    def test_tuple_format_invalid_inputs(self):
-        """Test tuple format with invalid input validation."""
-        evaluator = PathEfficiencyEvaluator()
-
-        response = []  # Empty response for testing validation
-
-        # Test invalid tuple length
-        with pytest.raises(TypeError):
-            evaluator(response=response, ground_truth=("only_one_element",))  # type: ignore
-
-        # Test invalid first element (not a list)
-        with pytest.raises(TypeError):
-            evaluator(response=response, ground_truth=("not_a_list", {}))  # type: ignore
-
-        # Test invalid second element (not a dict)
-        with pytest.raises(TypeError):
-            evaluator(response=response, ground_truth=(["tool"], "not_a_dict"))  # type: ignore
-
-        # Test invalid parameter values (not json serializable)
-        with pytest.raises(TypeError):
-            evaluator(response=response, ground_truth=(["tool"], {"tool": {"key": object}}))  # type: ignore
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_task_navigation_efficiency_evaluators.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_task_navigation_efficiency_evaluators.py
new file mode 100644
index 000000000000..02913da64da8
--- /dev/null
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_task_navigation_efficiency_evaluators.py
@@ -0,0 +1,184 @@
+import pytest
+from azure.ai.evaluation._evaluators._task_navigation_efficiency import (
+    TaskNavigationEfficiencyEvaluator,
+    TaskNavigationEfficiencyMatchingMode,
+)
+
+
+@pytest.mark.unittest
+class TestTaskNavigationEfficiencyEvaluator:
+    def test_exact_match_scenario(self):
+        """Test when agent steps exactly match ground truth."""
+        evaluator = TaskNavigationEfficiencyEvaluator(matching_mode=TaskNavigationEfficiencyMatchingMode.EXACT_MATCH)
+
+        response = [
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_1", "name": "search", "arguments": {}}],
+            },
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_2", "name": "analyze", "arguments": {}}],
+            },
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_3", "name": "report", "arguments": {}}],
+            },
+        ]
+        ground_truth = ["search", "analyze", "report"]
+
+        result = evaluator(response=response, ground_truth=ground_truth)
+        assert result["task_navigation_efficiency_result"] == "pass"
+        assert "properties" in result
+        assert result["properties"]["precision_score"] == 1.0
+        assert result["properties"]["recall_score"] == 1.0
+        assert result["properties"]["f1_score"] == 1.0
+
+    def test_in_order_match_with_extra_steps(self):
+        """Test when agent has extra steps but maintains order."""
+        evaluator = TaskNavigationEfficiencyEvaluator(matching_mode=TaskNavigationEfficiencyMatchingMode.IN_ORDER_MATCH)
+
+        response = [
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_1", "name": "search", "arguments": {}}],
+            },
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_2", "name": "extra_step", "arguments": {}}],
+            },
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_3", "name": "analyze", "arguments": {}}],
+            },
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_4", "name": "report", "arguments": {}}],
+            },
+        ]
+        ground_truth = ["search", "analyze", "report"]
+
+        result = evaluator(response=response, ground_truth=ground_truth)
+        assert result["task_navigation_efficiency_result"] == "pass"
+        assert result["properties"]["precision_score"] == 0.75  # 3/4
+        assert result["properties"]["recall_score"] == 1.0  # 3/3
+        assert result["properties"]["f1_score"] == pytest.approx(0.857, rel=1e-2)
+
+    def test_any_order_match(self):
+        """Test when agent has all steps but in wrong order."""
+        evaluator = TaskNavigationEfficiencyEvaluator(
+            matching_mode=TaskNavigationEfficiencyMatchingMode.ANY_ORDER_MATCH
+        )
+
+        response = [
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_1", "name": "report", "arguments": {}}],
+            },
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_2", "name": "search", "arguments": {}}],
+            },
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_3", "name": "analyze", "arguments": {}}],
+            },
+        ]
+        ground_truth = ["search", "analyze", "report"]
+
+        result = evaluator(response=response, ground_truth=ground_truth)
+        assert result["task_navigation_efficiency_result"] == "pass"
+        assert result["properties"]["precision_score"] == 1.0
+        assert result["properties"]["recall_score"] == 1.0
+        assert result["properties"]["f1_score"] == 1.0
+
+    def test_exact_match_failure(self):
+        """Test when exact match fails but other matches succeed."""
+        exact_evaluator = TaskNavigationEfficiencyEvaluator(
+            matching_mode=TaskNavigationEfficiencyMatchingMode.EXACT_MATCH
+        )
+        in_order_evaluator = TaskNavigationEfficiencyEvaluator(
+            matching_mode=TaskNavigationEfficiencyMatchingMode.IN_ORDER_MATCH
+        )
+
+        response = [
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_1", "name": "search", "arguments": {}}],
+            },
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_2", "name": "extra_step", "arguments": {}}],
+            },
+            {
+                "role": "assistant",
+                "content": [{"type": "tool_call", "tool_call_id": "call_3", "name": "analyze", "arguments": {}}],
+            },
+        ]
+        ground_truth = ["search", "analyze"]
+
+        exact_result = exact_evaluator(response=response, ground_truth=ground_truth)
+        assert exact_result["task_navigation_efficiency_result"] == "fail"
+
+        in_order_result = in_order_evaluator(response=response, ground_truth=ground_truth)
+        assert in_order_result["task_navigation_efficiency_result"] == "pass"
+
+    def test_invalid_ground_truth(self):
+        """Test with invalid ground truth steps."""
+        evaluator = TaskNavigationEfficiencyEvaluator()
+
+        with pytest.raises(TypeError):
+            evaluator(response=[], ground_truth="not_a_list")  # type: ignore
+
+        with pytest.raises(ValueError):
+            evaluator(response=[], ground_truth=[])
+
+    def test_tuple_format_with_parameters(self):
+        """Test tuple format with exact parameter matching."""
+        evaluator = TaskNavigationEfficiencyEvaluator(matching_mode=TaskNavigationEfficiencyMatchingMode.EXACT_MATCH)
+
+        response = [
+            {
+                "role": "assistant",
+                "content": [
+                    {
+                        "type": "tool_call",
+                        "tool_call_id": "call_1",
+                        "name": "search",
+                        "arguments": {"query": "weather", "location": "NYC"},
+                    }
+                ],
+            },
+        ]
+
+        # Ground truth with tuple format: (tool_names, parameters_dict)
+        ground_truth = (
+            ["search"],
+            {"search": {"query": "weather", "location": "NYC"}},
+        )
+
+        result = evaluator(response=response, ground_truth=ground_truth)
+        assert result["task_navigation_efficiency_result"] == "pass"
+        assert result["properties"]["precision_score"] == 1.0
+        assert result["properties"]["recall_score"] == 1.0
+        assert result["properties"]["f1_score"] == 1.0
+
+    def test_matching_mode_validation(self):
+        """Test validation of matching_mode parameter."""
+        # Test valid string mode
+        evaluator1 = TaskNavigationEfficiencyEvaluator(matching_mode="exact_match")
+        assert evaluator1.matching_mode == TaskNavigationEfficiencyMatchingMode.EXACT_MATCH
+
+        # Test valid enum mode
+        evaluator2 = TaskNavigationEfficiencyEvaluator(
+            matching_mode=TaskNavigationEfficiencyMatchingMode.IN_ORDER_MATCH
+        )
+        assert evaluator2.matching_mode == TaskNavigationEfficiencyMatchingMode.IN_ORDER_MATCH
+
+        # Test invalid string mode
+        with pytest.raises(ValueError):
+            TaskNavigationEfficiencyEvaluator(matching_mode="invalid_mode")
+
+        # Test invalid type for mode
+        with pytest.raises(Exception):  # EvaluationException
+            TaskNavigationEfficiencyEvaluator(matching_mode=123)  # type: ignore