diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py
index 75e9067e9b47..3b74169c923f 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py
@@ -30,7 +30,7 @@
 from ._evaluators._xpia import IndirectAttackEvaluator
 from ._evaluators._code_vulnerability import CodeVulnerabilityEvaluator
 from ._evaluators._ungrounded_attributes import UngroundedAttributesEvaluator
-from ._evaluators._tool_call_accuracy import ToolCallAccuracyEvaluator
+from ._evaluators._tool_call_quality import ToolCallQualityEvaluator
 from ._evaluators._document_retrieval import DocumentRetrievalEvaluator
 from ._model_configurations import (
     AzureAIProject,
@@ -130,7 +130,8 @@ def lazy_import():
     "EvaluationResult",
     "CodeVulnerabilityEvaluator",
     "UngroundedAttributesEvaluator",
-    "ToolCallAccuracyEvaluator",
+    "ToolCallQualityEvaluator",
+    "ToolCallAccuracyEvaluator",  # Backward compatibility alias
     "AzureOpenAIGrader",
     "AzureOpenAILabelGrader",
     "AzureOpenAIStringCheckGrader",
@@ -141,6 +142,9 @@ def lazy_import():
 
 __all__.extend([p for p in _patch_all if p not in __all__])
 
+# Backward compatibility alias
+ToolCallAccuracyEvaluator = ToolCallQualityEvaluator
+
 
 def __getattr__(name):
     """Handle lazy imports for optional dependencies."""
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_eval_mapping.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_eval_mapping.py
index e77708057173..645ae17dbbcd 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_eval_mapping.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_eval_mapping.py
@@ -37,7 +37,8 @@
     SexualEvaluator,
     SimilarityEvaluator,
     TaskAdherenceEvaluator,
-    ToolCallAccuracyEvaluator,
+    ToolCallAccuracyEvaluator,  # Backward compatibility alias
+    ToolCallQualityEvaluator,
     UngroundedAttributesEvaluator,
     ViolenceEvaluator,
 )
@@ -69,7 +70,8 @@
     SimilarityEvaluator: "similarity",
     TaskAdherenceEvaluator: "task_adherence",
     TaskCompletionEvaluator: "task_completion",
-    ToolCallAccuracyEvaluator: "tool_call_accuracy",
+    ToolCallAccuracyEvaluator: "tool_call_quality",  # Backward compatibility
+    ToolCallQualityEvaluator: "tool_call_quality",
     UngroundedAttributesEvaluator: "ungrounded_attributes",
     ViolenceEvaluator: "violence",
 }
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_quality/__init__.py
similarity index 68%
rename from sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py
rename to sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_quality/__init__.py
index 687f98d624f0..701b2af5fe32 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_quality/__init__.py
@@ -2,8 +2,8 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 
-from ._tool_call_accuracy import ToolCallAccuracyEvaluator
+from ._tool_call_quality import ToolCallQualityEvaluator
 
 __all__ = [
-    "ToolCallAccuracyEvaluator",
+    "ToolCallQualityEvaluator",
 ]
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_quality/_tool_call_quality.py
similarity index 90%
rename from sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py
rename to sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_quality/_tool_call_quality.py
index 99d939034bc4..3330f79d5ef8 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_quality/_tool_call_quality.py
@@ -58,14 +58,14 @@ def _get_needed_built_in_definitions(tool_calls: List[Dict]) -> List[Dict]:
 
 
 @experimental
-class ToolCallAccuracyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
-    """The Tool Call Accuracy evaluator assesses how accurately an AI uses tools by examining:
+class ToolCallQualityEvaluator(PromptyEvaluatorBase[Union[str, float]]):
+    """The Tool Call Quality evaluator assesses how accurately an AI uses tools by examining:
         - Relevance to the conversation.
         - Parameter correctness according to tool definitions.
         - Parameter value extraction from the conversation.
 
     The evaluator uses a scoring rubric of 1 to 5:
-        - Score 1: The tool calls are irrelevant
+        - Score 1: The tool calls are irrelevant.
         - Score 2: The tool calls are partially relevant, but not enough tools were called or the parameters were not correctly passed.
         - Score 3: The tool calls are relevant, but there were unnecessary, excessive tool calls made.
         - Score 4: The tool calls are relevant, but some tools returned errors and agent retried calling them again and succeeded.
@@ -82,20 +82,20 @@ class ToolCallAccuracyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
     .. admonition:: Example:
 
         .. literalinclude:: ../samples/evaluation_samples_evaluate.py
-            :start-after: [START tool_call_accuracy_evaluator]
-            :end-before: [END tool_call_accuracy_evaluator]
+            :start-after: [START tool_call_quality_evaluator]
+            :end-before: [END tool_call_quality_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call a ToolCallAccuracyEvaluator.
+            :caption: Initialize and call a ToolCallQualityEvaluator.
 
     .. admonition:: Example using Azure AI Project URL:
 
         .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
-            :start-after: [START tool_call_accuracy_evaluator]
-            :end-before: [END tool_call_accuracy_evaluator]
+            :start-after: [START tool_call_quality_evaluator]
+            :end-before: [END tool_call_quality_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call ToolCallAccuracyEvaluator using Azure AI Project URL in the following format
+            :caption: Initialize and call ToolCallQualityEvaluator using Azure AI Project URL in the following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
 
     .. note::
@@ -105,25 +105,25 @@ class ToolCallAccuracyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
         however, it is recommended to use the new key moving forward as the old key will be deprecated in the future.
     """
 
-    _PROMPTY_FILE = "tool_call_accuracy.prompty"
-    _RESULT_KEY = "tool_call_accuracy"
+    _PROMPTY_FILE = "tool_call_quality.prompty"
+    _RESULT_KEY = "tool_call_quality"
 
-    _MAX_TOOL_CALL_ACCURACY_SCORE = 5
-    _MIN_TOOL_CALL_ACCURACY_SCORE = 1
-    _DEFAULT_TOOL_CALL_ACCURACY_SCORE = 3
+    _MAX_TOOL_CALL_QUALITY_SCORE = 5
+    _MIN_TOOL_CALL_QUALITY_SCORE = 1
+    _DEFAULT_TOOL_CALL_QUALITY_SCORE = 3
 
     _NO_TOOL_CALLS_MESSAGE = "No tool calls found in response or provided tool_calls."
     _NO_TOOL_DEFINITIONS_MESSAGE = "Tool definitions must be provided."
     _TOOL_DEFINITIONS_MISSING_MESSAGE = "Tool definitions for all tool calls must be provided."
-    _INVALID_SCORE_MESSAGE = "Tool call accuracy score must be between 1 and 5."
+    _INVALID_SCORE_MESSAGE = "Tool call quality score must be between 1 and 5."
 
     _LLM_SCORE_KEY = "tool_calls_success_level"
 
-    id = "azureai://built-in/evaluators/tool_call_accuracy"
+    id = "azureai://built-in/evaluators/tool_call_quality"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
 
     @override
-    def __init__(self, model_config, *, threshold=_DEFAULT_TOOL_CALL_ACCURACY_SCORE, credential=None, **kwargs):
+    def __init__(self, model_config, *, threshold=_DEFAULT_TOOL_CALL_QUALITY_SCORE, credential=None, **kwargs):
         current_dir = os.path.dirname(__file__)
         prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
         self.threshold = threshold
@@ -241,11 +241,11 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:  # t
             score = llm_output.get(self._LLM_SCORE_KEY, None)
             if not score or not check_score_is_valid(
                 score,
-                ToolCallAccuracyEvaluator._MIN_TOOL_CALL_ACCURACY_SCORE,
-                ToolCallAccuracyEvaluator._MAX_TOOL_CALL_ACCURACY_SCORE,
+                ToolCallQualityEvaluator._MIN_TOOL_CALL_QUALITY_SCORE,
+                ToolCallQualityEvaluator._MAX_TOOL_CALL_QUALITY_SCORE,
             ):
                 raise EvaluationException(
-                    message=f"Invalid score value: {score}. Expected a number in range [{ToolCallAccuracyEvaluator._MIN_TOOL_CALL_ACCURACY_SCORE}, {ToolCallAccuracyEvaluator._MAX_TOOL_CALL_ACCURACY_SCORE}].",
+                    message=f"Invalid score value: {score}. Expected a number in range [{ToolCallQualityEvaluator._MIN_TOOL_CALL_QUALITY_SCORE}, {ToolCallQualityEvaluator._MAX_TOOL_CALL_QUALITY_SCORE}].",
                     internal_message="Invalid score value.",
                     category=ErrorCategory.FAILED_EXECUTION,
                     blame=ErrorBlame.SYSTEM_ERROR,
@@ -266,10 +266,10 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:  # t
 
         else:
             raise EvaluationException(
-                message="Tool call accuracy evaluator returned invalid output.",
+                message="Tool call quality evaluator returned invalid output.",
                 blame=ErrorBlame.SYSTEM_ERROR,
                 category=ErrorCategory.FAILED_EXECUTION,
-                target=ErrorTarget.TOOL_CALL_ACCURACY_EVALUATOR,
+                target=ErrorTarget.TOOL_CALL_QUALITY_EVALUATOR,
             )
 
     async def _real_call(self, **kwargs):
@@ -346,14 +346,14 @@ def _extract_needed_tool_definitions(self, tool_calls, tool_definitions):
                                 message=f"Tool definition for {tool_name} not found",
                                 blame=ErrorBlame.USER_ERROR,
                                 category=ErrorCategory.INVALID_VALUE,
-                                target=ErrorTarget.TOOL_CALL_ACCURACY_EVALUATOR,
+                                target=ErrorTarget.TOOL_CALL_QUALITY_EVALUATOR,
                             )
                     else:
                         raise EvaluationException(
                             message=f"Tool call missing name: {tool_call}",
                             blame=ErrorBlame.USER_ERROR,
                             category=ErrorCategory.INVALID_VALUE,
-                            target=ErrorTarget.TOOL_CALL_ACCURACY_EVALUATOR,
+                            target=ErrorTarget.TOOL_CALL_QUALITY_EVALUATOR,
                         )
                 else:
                     # Unsupported tool format - only converter format is supported
@@ -361,7 +361,7 @@ def _extract_needed_tool_definitions(self, tool_calls, tool_definitions):
                         message=f"Unsupported tool call format. Only converter format is supported: {tool_call}",
                         blame=ErrorBlame.USER_ERROR,
                         category=ErrorCategory.INVALID_VALUE,
-                        target=ErrorTarget.TOOL_CALL_ACCURACY_EVALUATOR,
+                        target=ErrorTarget.TOOL_CALL_QUALITY_EVALUATOR,
                     )
             else:
                 # Tool call is not a dictionary
@@ -369,7 +369,7 @@ def _extract_needed_tool_definitions(self, tool_calls, tool_definitions):
                     message=f"Tool call is not a dictionary: {tool_call}",
                     blame=ErrorBlame.USER_ERROR,
                     category=ErrorCategory.INVALID_VALUE,
-                    target=ErrorTarget.TOOL_CALL_ACCURACY_EVALUATOR,
+                    target=ErrorTarget.TOOL_CALL_QUALITY_EVALUATOR,
                 )
 
         return needed_tool_definitions
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_quality/tool_call_quality.prompty
similarity index 99%
rename from sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty
rename to sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_quality/tool_call_quality.prompty
index 4713b65f4ee9..a7fb6ad25bd3 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_quality/tool_call_quality.prompty
@@ -1,6 +1,6 @@
 ---
-name: Tool Call Accuracy
-description: Evaluates Tool Call Accuracy for tool used by agent
+name: Tool Call Quality
+description: Evaluates Tool Call Quality for tool used by agent
 model:
   api: chat
   parameters:
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py
index 9b28686b9bf6..59ce1f711db4 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py
@@ -96,6 +96,7 @@ class ErrorTarget(Enum):
     UNKNOWN = "Unknown"
     CONVERSATION = "Conversation"
     TOOL_CALL_ACCURACY_EVALUATOR = "ToolCallAccuracyEvaluator"
+    TOOL_CALL_QUALITY_EVALUATOR = "ToolCallQualityEvaluator"
     RED_TEAM = "RedTeam"
     AOAI_GRADER = "AoaiGrader"
     CONVERSATION_HISTORY_PARSING = "_get_conversation_history"
diff --git a/sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/tool_call_accuracy.ipynb b/sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/tool_call_quality.ipynb
similarity index 93%
rename from sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/tool_call_accuracy.ipynb
rename to sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/tool_call_quality.ipynb
index c08365505d6f..579f4290d4a9 100644
--- a/sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/tool_call_accuracy.ipynb
+++ b/sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/tool_call_quality.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Tool Call Accuracy Evaluator"
+    "# Tool Call Quality Evaluator"
    ]
   },
   {
@@ -13,7 +13,7 @@
    "source": [
     "### Getting Started\n",
     "\n",
-    "This sample demonstrates how to use Tool Call Accuracy Evaluator\n",
+    "This sample demonstrates how to use Tool Call Quality Evaluator\n",
     "Before running the sample:\n",
     "```bash\n",
     "pip install azure-ai-projects azure-identity azure-ai-evaluation\n",
@@ -33,7 +33,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The Tool Call Accuracy evaluator assesses how accurately an AI uses tools by examining:\n",
+    "The Tool Call Quality evaluator assesses how accurately an AI uses tools by examining:\n",
     "- Relevance to the conversation\n",
     "- Parameter correctness according to tool definitions\n",
     "- Parameter value extraction from the conversation\n",
@@ -53,10 +53,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Tool Call Accuracy requires following input:\n",
+    "Tool Call Quality requires following input:\n",
     "- Query - This can be a single query or a list of messages(conversation history with agent). Latter helps to determine if Agent used the information in history to make right tool calls.\n",
     "- Tool Calls - Tool Call(s) made by Agent to answer the query. Optional - if response has tool calls, if not provided evaluator will look for tool calls in response.\n",
-    "- Response - (Optional)Response from Agent (or any GenAI App). This can be a single text response or a list or messages generated as part of Agent Response. If tool calls are not provide Tool Call Accuracy Evaluator will look at response for tool calls.\n",
+    "- Response - (Optional)Response from Agent (or any GenAI App). This can be a single text response or a list or messages generated as part of Agent Response. If tool calls are not provide Tool Call Quality Evaluator will look at response for tool calls.\n",
     "- Tool Definitions - Tool(s) definition used by Agent to answer the query. \n"
    ]
   },
@@ -64,7 +64,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Initialize Tool Call Accuracy Evaluator\n"
+    "### Initialize Tool Call Quality Evaluator\n"
    ]
   },
   {
@@ -74,7 +74,7 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "from azure.ai.evaluation import ToolCallAccuracyEvaluator , AzureOpenAIModelConfiguration\n",
+    "from azure.ai.evaluation import ToolCallQualityEvaluator , AzureOpenAIModelConfiguration\n",
     "from pprint import pprint\n",
     "\n",
     "model_config = AzureOpenAIModelConfiguration(\n",
@@ -85,7 +85,7 @@
     ")\n",
     "\n",
     "\n",
-    "tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=model_config)"
+    "tool_call_quality = ToolCallQualityEvaluator(model_config=model_config)"
    ]
   },
   {
@@ -140,7 +140,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "response = tool_call_accuracy(query=query, tool_calls=tool_call, tool_definitions=tool_definition)\n",
+    "response = tool_call_quality(query=query, tool_calls=tool_call, tool_definitions=tool_definition)\n",
     "pprint(response)"
    ]
   },
@@ -197,7 +197,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "response = tool_call_accuracy(query=query, tool_calls=tool_calls, tool_definitions=tool_definition)\n",
+    "response = tool_call_quality(query=query, tool_calls=tool_calls, tool_definitions=tool_definition)\n",
     "pprint(response)"
    ]
   },
@@ -206,7 +206,7 @@
    "metadata": {},
    "source": [
     "#### Tool Calls passed as part of `Response` (common for agent case)\n",
-    "- Tool Call Accuracy Evaluator extracts tool calls from response"
+    "- Tool Call Quality Evaluator extracts tool calls from response"
    ]
   },
   {
diff --git a/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate.py b/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate.py
index a6a0f3b6805d..b32896e41014 100644
--- a/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate.py
@@ -541,9 +541,9 @@ def evaluation_evaluate_classes_methods(self):
         )
         # [END groundedness_pro_evaluator]
 
-        # [START tool_call_accuracy_evaluator]
+        # [START tool_call_quality_evaluator]
         import os
-        from azure.ai.evaluation import ToolCallAccuracyEvaluator
+        from azure.ai.evaluation import ToolCallQualityEvaluator
 
         model_config = {
             "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
@@ -551,8 +551,8 @@ def evaluation_evaluate_classes_methods(self):
             "azure_deployment": os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
         }
 
-        tool_call_accuracy_evaluator = ToolCallAccuracyEvaluator(model_config=model_config)
-        tool_call_accuracy_evaluator(
+        tool_call_quality_evaluator = ToolCallQualityEvaluator(model_config=model_config)
+        tool_call_quality_evaluator(
             query="How is the weather in New York?",
             response="The weather in New York is sunny.",
             tool_calls={
@@ -573,7 +573,7 @@ def evaluation_evaluate_classes_methods(self):
                 },
             },
         )
-        # [END tool_call_accuracy_evaluator]
+        # [END tool_call_quality_evaluator]
 
         # [START path_efficiency_evaluator]
         from azure.ai.evaluation._evaluators._path_efficiency import PathEfficiencyEvaluator
diff --git a/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate_fdp.py b/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate_fdp.py
index d6b023a581b1..29c324ec16fc 100644
--- a/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate_fdp.py
+++ b/sdk/evaluation/azure-ai-evaluation/samples/evaluation_samples_evaluate_fdp.py
@@ -547,9 +547,9 @@ def evaluation_evaluate_classes_methods(self):
         )
         # [END groundedness_pro_evaluator]
 
-        # [START tool_call_accuracy_evaluator]
+        # [START tool_call_quality_evaluator]
         import os
-        from azure.ai.evaluation import ToolCallAccuracyEvaluator
+        from azure.ai.evaluation import ToolCallQualityEvaluator
 
         model_config = {
             "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),  # https://<account_name>.services.ai.azure.com
@@ -557,8 +557,8 @@ def evaluation_evaluate_classes_methods(self):
             "azure_deployment": os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
         }
 
-        tool_call_accuracy_evaluator = ToolCallAccuracyEvaluator(model_config=model_config)
-        tool_call_accuracy_evaluator(
+        tool_call_quality_evaluator = ToolCallQualityEvaluator(model_config=model_config)
+        tool_call_quality_evaluator(
             query="How is the weather in New York?",
             response="The weather in New York is sunny.",
             tool_calls={
@@ -579,7 +579,7 @@ def evaluation_evaluate_classes_methods(self):
                 },
             },
         )
-        # [END tool_call_accuracy_evaluator]
+        # [END tool_call_quality_evaluator]
 
         # [START path_efficiency_evaluator]
         from azure.ai.evaluation._evaluators._path_efficiency import PathEfficiencyEvaluator
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_agent_evaluators.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_agent_evaluators.py
index 3b3580817eb5..e9de7635c046 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_agent_evaluators.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_agent_evaluators.py
@@ -1,16 +1,16 @@
 import pytest
-from azure.ai.evaluation import evaluate, ToolCallAccuracyEvaluator
+from azure.ai.evaluation import evaluate, ToolCallQualityEvaluator
 from azure.ai.evaluation._exceptions import EvaluationException
 
 
 @pytest.mark.usefixtures("mock_model_config")
 @pytest.mark.unittest
 class TestEvaluate:
-    def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
-        tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+    def test_tool_call_quality_evaluator_missing_inputs(self, mock_model_config):
+        tool_call_quality = ToolCallQualityEvaluator(model_config=mock_model_config)
 
         # Test with missing tool_calls and response
-        result = tool_call_accuracy(
+        result = tool_call_quality(
             query="Where is the Eiffel Tower?",
             tool_definitions=[
                 {
@@ -28,14 +28,13 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
                 }
             ],
         )
-        assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
+        assert result[ToolCallQualityEvaluator._RESULT_KEY] == ToolCallQualityEvaluator._NOT_APPLICABLE_RESULT
         assert (
-            ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE
-            in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
+            ToolCallQualityEvaluator._NO_TOOL_CALLS_MESSAGE in result[f"{ToolCallQualityEvaluator._RESULT_KEY}_reason"]
         )
 
         # Test with missing tool_definitions
-        result = tool_call_accuracy(
+        result = tool_call_quality(
             query="Where is the Eiffel Tower?",
             tool_definitions=[],
             tool_calls=[
@@ -46,14 +45,14 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
                 }
             ],
         )
-        assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
+        assert result[ToolCallQualityEvaluator._RESULT_KEY] == ToolCallQualityEvaluator._NOT_APPLICABLE_RESULT
         assert (
-            ToolCallAccuracyEvaluator._NO_TOOL_DEFINITIONS_MESSAGE
-            in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
+            ToolCallQualityEvaluator._NO_TOOL_DEFINITIONS_MESSAGE
+            in result[f"{ToolCallQualityEvaluator._RESULT_KEY}_reason"]
         )
 
         # Test with response that has no tool calls
-        result = tool_call_accuracy(
+        result = tool_call_quality(
             query="Where is the Eiffel Tower?",
             response="The Eiffel Tower is in Paris.",
             tool_definitions=[
@@ -72,14 +71,13 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
                 }
             ],
         )
-        assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
+        assert result[ToolCallQualityEvaluator._RESULT_KEY] == ToolCallQualityEvaluator._NOT_APPLICABLE_RESULT
         assert (
-            ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE
-            in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
+            ToolCallQualityEvaluator._NO_TOOL_CALLS_MESSAGE in result[f"{ToolCallQualityEvaluator._RESULT_KEY}_reason"]
         )
 
         # Test with tool call for which definition is not provided
-        result = tool_call_accuracy(
+        result = tool_call_quality(
             query="Where is the Eiffel Tower?",
             tool_calls=[{"type": "tool_call", "name": "some_other_tool", "arguments": {}}],
             tool_definitions=[
@@ -98,8 +96,8 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
                 }
             ],
         )
-        assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
+        assert result[ToolCallQualityEvaluator._RESULT_KEY] == ToolCallQualityEvaluator._NOT_APPLICABLE_RESULT
         assert (
-            ToolCallAccuracyEvaluator._TOOL_DEFINITIONS_MISSING_MESSAGE
-            in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
+            ToolCallQualityEvaluator._TOOL_DEFINITIONS_MISSING_MESSAGE
+            in result[f"{ToolCallQualityEvaluator._RESULT_KEY}_reason"]
         )
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_accuracy_evaluator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_quality_evaluator.py
similarity index 88%
rename from sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_accuracy_evaluator.py
rename to sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_quality_evaluator.py
index 714b1b4073e2..ea63f7e79fc4 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_accuracy_evaluator.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_quality_evaluator.py
@@ -1,7 +1,7 @@
 from unittest.mock import MagicMock
 
 import pytest
-from azure.ai.evaluation import ToolCallAccuracyEvaluator
+from azure.ai.evaluation import ToolCallQualityEvaluator
 from azure.ai.evaluation._exceptions import EvaluationException
 
 
@@ -71,9 +71,9 @@ async def flow_side_effect(timeout, **kwargs):
 
 @pytest.mark.usefixtures("mock_model_config")
 @pytest.mark.unittest
-class TestToolCallAccuracyEvaluator:
+class TestToolCallQualityEvaluator:
     def test_evaluate_tools_valid1(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test evaluation with one good and one bad tool call
@@ -124,18 +124,18 @@ def test_evaluate_tools_valid1(self, mock_model_config):
         ]
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
         assert key in result and f"{key}_result" in result and f"{key}_threshold" in result
         assert result[key] == 3.0  # Mixed good/bad gets score 3
         assert result[f"{key}_result"] == "pass"
-        assert result[f"{key}_threshold"] == ToolCallAccuracyEvaluator._DEFAULT_TOOL_CALL_ACCURACY_SCORE
+        assert result[f"{key}_threshold"] == ToolCallQualityEvaluator._DEFAULT_TOOL_CALL_QUALITY_SCORE
         assert f"{key}_reason" in result
         assert result[f"{key}_reason"] == "Evaluated 2 tool calls with 1 correct calls."
         assert "details" in result
 
     def test_evaluate_tools_valid2(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test evaluation with two bad tool calls
@@ -186,18 +186,18 @@ def test_evaluate_tools_valid2(self, mock_model_config):
         ]
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
         assert key in result and f"{key}_result" in result and f"{key}_threshold" in result
         assert result[key] == 1.0  # All bad gets score 1
         assert result[f"{key}_result"] == "fail"
-        assert result[f"{key}_threshold"] == ToolCallAccuracyEvaluator._DEFAULT_TOOL_CALL_ACCURACY_SCORE
+        assert result[f"{key}_threshold"] == ToolCallQualityEvaluator._DEFAULT_TOOL_CALL_QUALITY_SCORE
         assert f"{key}_reason" in result
         assert result[f"{key}_reason"] == "Evaluated 2 tool calls with 0 correct calls."
         assert "details" in result
 
     def test_evaluate_tools_valid3(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test evaluation with two good tool calls
@@ -248,19 +248,19 @@ def test_evaluate_tools_valid3(self, mock_model_config):
         ]
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
         assert key in result and f"{key}_result" in result and f"{key}_threshold" in result
         assert result[key] == 5.0  # All good gets score 5
         assert result[f"{key}_result"] == "pass"
-        assert result[f"{key}_threshold"] == ToolCallAccuracyEvaluator._DEFAULT_TOOL_CALL_ACCURACY_SCORE
+        assert result[f"{key}_threshold"] == ToolCallQualityEvaluator._DEFAULT_TOOL_CALL_QUALITY_SCORE
         assert f"{key}_reason" in result
         assert result[f"{key}_reason"] == "Evaluated 2 tool calls with 2 correct calls."
         assert "details" in result
 
     def test_evaluate_tools_one_eval_fails(self, mock_model_config):
         with pytest.raises(EvaluationException) as exc_info:
-            evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+            evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
             evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
             # Test evaluation with an invalid tool call ID to trigger failure
@@ -294,7 +294,7 @@ def test_evaluate_tools_one_eval_fails(self, mock_model_config):
         assert "Invalid score value" in str(exc_info.value)
 
     def test_evaluate_tools_some_not_applicable(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test with one function tool and one non-function tool
@@ -345,16 +345,16 @@ def test_evaluate_tools_some_not_applicable(self, mock_model_config):
         ]
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
-        assert result[key] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
+        assert result[key] == ToolCallQualityEvaluator._NOT_APPLICABLE_RESULT
         assert result[f"{key}_result"] == "pass"
-        assert result[f"{key}_threshold"] == ToolCallAccuracyEvaluator._DEFAULT_TOOL_CALL_ACCURACY_SCORE
-        assert result[f"{key}_reason"] == ToolCallAccuracyEvaluator._TOOL_DEFINITIONS_MISSING_MESSAGE
+        assert result[f"{key}_threshold"] == ToolCallQualityEvaluator._DEFAULT_TOOL_CALL_QUALITY_SCORE
+        assert result[f"{key}_reason"] == ToolCallQualityEvaluator._TOOL_DEFINITIONS_MISSING_MESSAGE
         assert result["details"] == {}
 
     def test_evaluate_tools_all_not_applicable(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test with only non-function tools
@@ -385,16 +385,16 @@ def test_evaluate_tools_all_not_applicable(self, mock_model_config):
         ]
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
-        assert result[key] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
+        assert result[key] == ToolCallQualityEvaluator._NOT_APPLICABLE_RESULT
         assert result[f"{key}_result"] == "pass"
-        assert result[f"{key}_threshold"] == ToolCallAccuracyEvaluator._DEFAULT_TOOL_CALL_ACCURACY_SCORE
-        assert result[f"{key}_reason"] == ToolCallAccuracyEvaluator._TOOL_DEFINITIONS_MISSING_MESSAGE
+        assert result[f"{key}_threshold"] == ToolCallQualityEvaluator._DEFAULT_TOOL_CALL_QUALITY_SCORE
+        assert result[f"{key}_reason"] == ToolCallQualityEvaluator._TOOL_DEFINITIONS_MISSING_MESSAGE
         assert result["details"] == {}
 
     def test_evaluate_tools_no_tools(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test with no tool calls provided
@@ -418,16 +418,16 @@ def test_evaluate_tools_no_tools(self, mock_model_config):
         ]
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
-        assert result[key] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
+        assert result[key] == ToolCallQualityEvaluator._NOT_APPLICABLE_RESULT
         assert result[f"{key}_result"] == "pass"
-        assert result[f"{key}_threshold"] == ToolCallAccuracyEvaluator._DEFAULT_TOOL_CALL_ACCURACY_SCORE
-        assert result[f"{key}_reason"] == ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE
+        assert result[f"{key}_threshold"] == ToolCallQualityEvaluator._DEFAULT_TOOL_CALL_QUALITY_SCORE
+        assert result[f"{key}_reason"] == ToolCallQualityEvaluator._NO_TOOL_CALLS_MESSAGE
         assert result["details"] == {}
 
     def test_evaluate_bing_custom_search(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test relevant bing custom search - converter format
@@ -445,13 +445,13 @@ def test_evaluate_bing_custom_search(self, mock_model_config):
         tool_definitions = []
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
         assert result[key] == 5.0
         assert result[f"{key}_result"] == "pass"
 
     def test_evaluate_bing_grounding(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test relevant bing grounding for house prices - converter format
@@ -469,13 +469,13 @@ def test_evaluate_bing_grounding(self, mock_model_config):
         tool_definitions = []
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
         assert result[key] == 5.0
         assert result[f"{key}_result"] == "pass"
 
     def test_evaluate_file_search(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test file search for credit card statement - converter format
@@ -491,13 +491,13 @@ def test_evaluate_file_search(self, mock_model_config):
         tool_definitions = []
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
         assert result[key] == 5.0
         assert result[f"{key}_result"] == "pass"
 
     def test_evaluate_azure_ai_search(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test Azure AI Search for real estate - converter format
@@ -513,13 +513,13 @@ def test_evaluate_azure_ai_search(self, mock_model_config):
         tool_definitions = []
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
         assert result[key] == 5.0
         assert result[f"{key}_result"] == "pass"
 
     def test_evaluate_fabric_dataagent(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test Fabric Data Agent for financial analysis - converter format
@@ -535,13 +535,13 @@ def test_evaluate_fabric_dataagent(self, mock_model_config):
         tool_definitions = []
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
         assert result[key] == 5.0
         assert result[f"{key}_result"] == "pass"
 
     def test_evaluate_code_interpreter(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test code interpreter for statistical analysis - converter format
@@ -559,13 +559,13 @@ def test_evaluate_code_interpreter(self, mock_model_config):
         tool_definitions = []
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
         assert result[key] == 5.0
         assert result[f"{key}_result"] == "pass"
 
     def test_evaluate_sharepoint_grounding(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test SharePoint grounding for document search - converter format
@@ -581,13 +581,13 @@ def test_evaluate_sharepoint_grounding(self, mock_model_config):
         tool_definitions = []
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
         assert result[key] == 5.0
         assert result[f"{key}_result"] == "pass"
 
     def test_evaluate_open_api(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test OpenAPI function call for exchange rates - converter format
@@ -603,13 +603,13 @@ def test_evaluate_open_api(self, mock_model_config):
         tool_definitions = []
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
         assert result[key] == "not applicable"
         assert result[f"{key}_result"] == "pass"
 
     def test_evaluate_open_api_with_tool_definition(self, mock_model_config):
-        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        evaluator = ToolCallQualityEvaluator(model_config=mock_model_config)
         evaluator._flow = MagicMock(side_effect=flow_side_effect)
 
         # Test OpenAPI function call for exchange rates - converter format
@@ -680,7 +680,7 @@ def test_evaluate_open_api_with_tool_definition(self, mock_model_config):
         ]
         result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
 
-        key = ToolCallAccuracyEvaluator._RESULT_KEY
+        key = ToolCallQualityEvaluator._RESULT_KEY
         assert result is not None
         assert result[key] == 5.0
         assert result[f"{key}_result"] == "pass"