Skip to content

Commit 7aeef6f

Browse files
authored
math evaluator update (#37283)
1 parent f7c3b85 commit 7aeef6f

File tree

3 files changed

+12
-14
lines changed

3 files changed

+12
-14
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/utils.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
except ImportError:
77
import constants
88

9-
from typing import List, cast
9+
from typing import List
1010

1111
import nltk
1212
import numpy as np
@@ -45,14 +45,11 @@ def get_harm_severity_level(harm_score: int) -> str:
4545
def nltk_tokenize(text: str) -> List[str]:
4646
"""Tokenize the input text using the NLTK tokenizer."""
4747

48-
is_latin_or_numeric = all(
49-
("\u0020" <= c <= "\u007E") # Basic Latin
50-
or ("\u00A0" <= c <= "\u00FF") # Latin-1 Supplement
51-
or ("0" <= c <= "9") # Digits
52-
for c in text
53-
)
48+
if not text.isascii():
49+
# Use NISTTokenizer for international tokenization
50+
tokens = NISTTokenizer().international_tokenize(text)
51+
else:
52+
# By default, use NLTK word tokenizer
53+
tokens = nltk.word_tokenize(text)
5454

55-
if is_latin_or_numeric:
56-
return cast(List[str], nltk.word_tokenize(text))
57-
58-
return list(NISTTokenizer().international_tokenize(text))
55+
return list(tokens)

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/evaluators/_bleu/_bleu.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ async def __call__(self, *, answer: str, ground_truth: str, **kwargs):
1515
reference_tokens = nltk_tokenize(ground_truth)
1616
hypothesis_tokens = nltk_tokenize(answer)
1717

18+
# NIST Smoothing
1819
smoothing_function = SmoothingFunction().method4
1920
score = sentence_bleu([reference_tokens], hypothesis_tokens, smoothing_function=smoothing_function)
2021

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/evaluators/_meteor/_meteor.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# ---------------------------------------------------------
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
4-
from nltk.translate.meteor_score import single_meteor_score
4+
from nltk.translate.meteor_score import meteor_score
55

66
from promptflow._utils.async_utils import async_run_allowing_running_loop
77
from azure.ai.evaluation._common.utils import nltk_tokenize
@@ -17,8 +17,8 @@ async def __call__(self, *, ground_truth: str, answer: str, **kwargs):
1717
reference_tokens = nltk_tokenize(ground_truth)
1818
hypothesis_tokens = nltk_tokenize(answer)
1919

20-
score = single_meteor_score(
21-
reference_tokens,
20+
score = meteor_score(
21+
[reference_tokens],
2222
hypothesis_tokens,
2323
alpha=self._alpha,
2424
beta=self._beta,

0 commit comments

Comments
 (0)