Skip to content

Commit a1c4c13

Browse files
authored
[SPARKNLP-937] Fixing chunk construction when an entity is found (#14047)
1 parent 37b8d24 commit a1c4c13

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed

src/main/scala/com/johnsnowlabs/nlp/annotators/er/AhoCorasickAutomaton.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class AhoCorasickAutomaton(
123123

124124
if (state == 0 && previousState > 0) {
125125
val node = nodes(previousState).get
126-
if (node.isLeaf) {
126+
if (node.isLeaf && node.entity.nonEmpty) {
127127
val chunkAnnotation = buildAnnotation(chunk, node.entity, node.id, sentence)
128128
chunkAnnotations.append(chunkAnnotation)
129129
chunk.clear()
@@ -135,8 +135,10 @@ class AhoCorasickAutomaton(
135135

136136
if (chunk.nonEmpty) {
137137
val node = nodes(previousState).get
138-
val chunkAnnotation = buildAnnotation(chunk, node.entity, node.id, sentence)
139-
chunkAnnotations.append(chunkAnnotation)
138+
if (node.entity.nonEmpty) {
139+
val chunkAnnotation = buildAnnotation(chunk, node.entity, node.id, sentence)
140+
chunkAnnotations.append(chunkAnnotation)
141+
}
140142
chunk.clear()
141143
}
142144

src/test/scala/com/johnsnowlabs/nlp/annotators/er/EntityRulerTest.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,7 @@ class EntityRulerTest extends AnyFlatSpec with SparkSessionTest {
791791
AssertAnnotations.assertFields(expectedEntitiesFromText6, actualEntities)
792792
}
793793

794-
it should "work with LightPipeline" in {
794+
it should "work with LightPipeline" taggedAs FastTest in {
795795
val externalResource =
796796
ExternalResource(s"$testPath/keywords_only.json", ReadAs.TEXT, Map("format" -> "json"))
797797
val entityRulerPipeline = getEntityRulerKeywordsPipeline(externalResource, useStorage = false)

0 commit comments

Comments
 (0)