Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .DS_Store
Binary file not shown.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@ Korean analysis plugin that integrates [open-korean-text](https://github.com/ope

Elasticsearch 4.x 이하의 버전은 지원하지 않습니다.

## Plugin에 사용자 사전을 입력하여 수정이 필요한 경우
https://github.com/Keunyoung-Jung/sns-tokenizer-whole 로 방문하여 사용자사전을 받을 수 있습니다.

## Install

```shell
$ cd ${ES_HOME}
$ bin/elasticsearch-plugin install {download URL}
```

설치 후 `bin/elasticsearch` 실행 시, `loaded plugin [elasticsearch-analysis-openkoreantext]` 라는 로그가 출력되는지 확인합니다.
설치 후 `bin/elasticsearch` 실행 시, `loaded plugin [analysis-openkoreantext]` 라는 로그가 출력되는지 확인합니다.

**download URL 은 아래 [Compatible Versions](#compatible-versions)를 참고하여 Elasticsearch 버젼에 맞는 Plugin 버젼을 다운로드 받아야합니다.**

Expand Down Expand Up @@ -112,12 +115,12 @@ Elasticsearch의 default analyzer를 사용했을 경우
## User Dictionary
[기본사전](https://github.com/open-korean-text/open-korean-text/tree/master/src/main/resources/org/openkoreantext/processor/util) 이외에 사용자가 원하는 단어를 추가하여 사용할 수 있습니다. 예를들어 `말썽쟁이`를 분석하면 `말썽(Noun)`과 `쟁이(suffix)`로 추출되지만, 사전에 `말썽쟁이`를 추가하면 `말썽쟁이(Noun)`로 추출할 수 있습니다.

Analyzer Plugin을 설치하면 `{ES_HOME}/plugins/elasticserach-analysis-openkoreantext` 위치에 `dic/` 디렉토리를 찾을 수 있습니다. 해당 디렉토리 안에 사전 텍스트 파일을 추가하면 됩니다.
Analyzer Plugin을 설치하면 `{ES_HOME}/plugins/analysis-openkoreantext` 위치에 `dic/` 디렉토리를 찾을 수 있습니다. 해당 디렉토리 안에 사전 텍스트 파일을 추가하면 됩니다.

사전 텍스트 파일은 각 단어들을 줄바꿈하여 넣으면 됩니다. (단, 띄워쓰기는 단어로 인식하지 않습니다.)

```plain
# {ES_HOME}/plugins/elasticserach-analysis-openkoreantext/dic/sampledictionary
# {ES_HOME}/plugins/analysis-openkoreantext/dic/sampledictionary
말썽쟁이
뚜쟁이
욕쟁이할머니
Expand Down
58 changes: 29 additions & 29 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -1,66 +1,70 @@
group 'org.openkoreantext'
version '6.5.2.0'
version '9.1.4.0'

apply plugin: 'java'
apply plugin: 'maven'
apply plugin: 'maven-publish'
apply plugin: 'signing'
apply plugin: 'jacoco'
apply plugin: 'idea'

sourceCompatibility = 1.8
java {
sourceCompatibility = JavaVersion.VERSION_21
targetCompatibility = JavaVersion.VERSION_21
}

repositories {
mavenCentral()
jcenter()
}

configurations {
distJars {
extendsFrom runtime
extendsFrom runtimeClasspath
}
}

ext {
elasticsearchVersion = '6.5.2'
openKoreanTextVersion = '2.1.0'
elasticsearchVersion = '9.1.4'
openKoreanTextVersion = '2.3.1'
}

dependencies {
compile group: 'org.openkoreantext', name: 'open-korean-text', version: openKoreanTextVersion
implementation group: 'org.openkoreantext', name: 'open-korean-text', version: openKoreanTextVersion

compileOnly group: 'org.elasticsearch', name: 'elasticsearch', version: elasticsearchVersion

testCompile group: 'org.elasticsearch.test', name: 'framework', version: elasticsearchVersion
testCompile group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.9.1'
testCompile group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.9.1'
testImplementation group: 'org.elasticsearch.test', name: 'framework', version: elasticsearchVersion
testImplementation group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.19.0'
testImplementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.19.0'
}

task makePluginDescriptor(type: Copy) {
from 'src/main/resources'
into 'build/tmp/plugin'
expand([
'descriptor': [
'name': 'elasticsearch-analysis-openkoreantext',
'name': 'analysis-openkoreantext',
'classname': 'org.elasticsearch.plugin.analysis.openkoreantext.AnalysisOpenKoreanTextPlugin',
'description': 'Korean analysis plugin integrates open-korean-text module into elasticsearch.',
'version': '1.0.0',
'javaVersion': sourceCompatibility,
'version': '9.1.4',
'javaVersion': java.sourceCompatibility,
'elasticsearchVersion' : elasticsearchVersion
]
])
}

task buildPluginZip(type: Zip, dependsOn: [':jar', ':makePluginDescriptor']) {
from configurations.distJars
from jar.archivePath
from jar.archiveFile
from 'build/tmp/plugin'
into '.'
classifier = 'plugin'
archiveBaseName = 'elasticsearch-analysis-openkoreantext'
archiveClassifier = 'plugin'
}

build.finalizedBy(buildPluginZip)

task javadocJar(type: Jar) {
classifier = 'javadoc'
archiveClassifier = 'javadoc'
from javadoc
}

Expand All @@ -71,7 +75,7 @@ tasks.withType(Javadoc) {
}

task sourcesJar(type: Jar) {
classifier = 'sources'
archiveClassifier = 'sources'
from sourceSets.main.allSource
}

Expand All @@ -86,14 +90,14 @@ test {
}

jacoco {
toolVersion = '0.7.1.201405082137'
toolVersion = '0.8.12'
}

jacocoTestReport {
reports {
html.enabled = true
xml.enabled = true
csv.enabled = false
html.required = true
xml.required = true
csv.required = false
}
}

Expand All @@ -102,12 +106,8 @@ task jacocoRootReport(type: org.gradle.testing.jacoco.tasks.JacocoReport) {
classDirectories = files(sourceSets.main.output)
executionData = files(jacocoTestReport.executionData)
reports {
html.enabled = true
xml.enabled = true
csv.enabled = false
html.required = true
xml.required = true
csv.required = false
}
}

task wrapper(type: Wrapper) {
gradleVersion = '3.4'
}
Binary file added gradle/.DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-3.4-all.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-all.zip
2 changes: 1 addition & 1 deletion settings.gradle
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
rootProject.name = 'elasticsearch-analysis-openkoreantext'
rootProject.name = 'analysis-openkoreantext'

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.apache.lucene.analysis.standard.ClassicFilter;

import java.io.Reader;
import java.util.Arrays;
Expand Down Expand Up @@ -36,7 +35,6 @@ protected TokenStreamComponents createComponents(String fieldName) {

TokenStream tokenStream = new OpenKoreanTextStemmer(tokenizer);
tokenStream = new OpenKoreanTextRedundantFilter(tokenStream);
tokenStream = new ClassicFilter(tokenStream);
tokenStream = new LengthFilter(tokenStream, 0, MAX_TOKEN_LENGTH);
tokenStream = new LowerCaseFilter(tokenStream);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.security.AccessControlException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -39,7 +38,7 @@ public class UserDictionaryLoader {
if(dicDirectory.isDirectory()) {
dicFiles = dicDirectory.listFiles();
}
} catch (AccessControlException e) {
} catch (SecurityException e) {
logger.error("Can not load dictionary files", e);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public class OpenKoreanTextAnalyzerProvider extends AbstractIndexAnalyzerProvide
private final OpenKoreanTextAnalyzer analyzer;

public OpenKoreanTextAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
super(name);
analyzer= new OpenKoreanTextAnalyzer();
UserDictionaryLoader.loadDefaultUserDictionaries();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,20 @@
/**
* A ES character-filter factory for {@link OpenKoreanTextNormalizer}.
*/
public class OpenKoreanTextNormalizerFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
public class OpenKoreanTextNormalizerFactory extends AbstractCharFilterFactory {

public OpenKoreanTextNormalizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name);
super(name);
}

@Override
public Reader create(Reader reader) {
return new OpenKoreanTextNormalizer(reader);
}

@Override
public Object getMultiTermComponent() {
return this;
}

public static class OpenKoreanTextStemmerFactory extends AbstractTokenFilterFactory {
public OpenKoreanTextStemmerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
super(name);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
public class OpenKoreanTextPhraseExtractorFactory extends AbstractTokenFilterFactory {

public OpenKoreanTextPhraseExtractorFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
super(name);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
public class OpenKoreanTextRedundantFilterFactory extends AbstractTokenFilterFactory {

public OpenKoreanTextRedundantFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
super(name);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
public class OpenKoreanTextStemmerFactory extends AbstractTokenFilterFactory {

public OpenKoreanTextStemmerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
super(name);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
public class OpenKoreanTextTokenizerFactory extends AbstractTokenizerFactory {

public OpenKoreanTextTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
super(name);
UserDictionaryLoader.loadDefaultUserDictionaries();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import static org.hamcrest.Matchers.instanceOf;

public class OpenKoreanTextAnalyzerTest extends ESTestCase {
public class TestOpenKoreanTextAnalyzer extends ESTestCase {
public void testDefaultComponentsLoading() throws IOException {
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisOpenKoreanTextPlugin());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import java.io.StringReader;

public class OpenKoreanTextNormalizerTest {
public class TestOpenKoreanTextNormalizer {
@Test
public void testNormalizerCharFilter() throws Exception {
String query = "한국어를 처리하는 예시입니닼ㅋ. 오픈코리안텍스틓ㅎㅎㅎㅎㅎㅎㅎ";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import java.io.IOException;
import java.io.StringReader;

public class OpenKoreanTextPhraseExtractorTest {

public class TestOpenKoreanTextPhraseExtractor {
@Test
public void testBasicUsage() throws IOException {
String query = "한국어를 처리하는 예시입니다ㅋㅋ #한국어";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import java.io.IOException;
import java.io.StringReader;

public class OpenKoreanTextRedundantFilterTest {
public class TestOpenKoreanTextRedundantFilter {
@Test
public void testBasicUsage() throws IOException {
String query = "그리고 이것은 예시, 또는 예로써, 한국어를 처리하기 입니다";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import java.io.IOException;
import java.io.StringReader;

public class OpenKoreanTextStemmerTest {

public class TestOpenKoreanTextStemmer {
@Test
public void testBasicUsage() throws IOException {
String query = "한국어를 처리하는 예시입니다ㅋㅋ";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

public class OpenKoreanTextTokenizerTest {

public class TestOpenKoreanTextTokenizer {
@Test
public void testTokenizer() throws IOException {
String text = "한국어를 처리하는 예시입니다ㅋㅋ";
Expand Down Expand Up @@ -61,13 +61,13 @@ public void testUserDictionaryFromFile() throws IOException {
}

@Test
public void testUserDictionaryFromURL() throws IOException {
public void testUserDictionaryFromURL() throws Exception {
String text = "안비빈비빔밥은 맛있다";
String[] expected = new String[]{"안비빈비빔밥", "은", " ", "맛있다"};

OpenKoreanTextTokenizer tokenizer = new OpenKoreanTextTokenizer();

URL url = new URL("https://raw.githubusercontent.com/open-korean-text/elasticsearch-analysis-openkoreantext/master/src/test/resources/httpdictionary");
URL url = URI.create("https://raw.githubusercontent.com/open-korean-text/elasticsearch-analysis-openkoreantext/master/src/test/resources/httpdictionary").toURL();
UserDictionaryLoader.addUserDictionary(url);

tokenizer.setReader(new StringReader(text));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
import org.elasticsearch.action.admin.cluster.node.info.PluginsAndModules;
import org.elasticsearch.plugin.analysis.openkoreantext.AnalysisOpenKoreanTextPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.plugins.PluginInfo;
import org.elasticsearch.plugins.PluginRuntimeInfo;
import org.elasticsearch.test.ESIntegTestCase;
import org.junit.Assert;

Expand All @@ -20,13 +21,16 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
public void testPluginIsLoaded() {
NodesInfoResponse response = client().admin().cluster().prepareNodesInfo().setPlugins(true).get();
for (NodeInfo node : response.getNodes()) {
boolean founded = false;
for (PluginInfo pluginInfo : node.getPlugins().getPluginInfos()) {
if (pluginInfo.getName().equals(AnalysisOpenKoreanTextPlugin.class.getName())) {
founded = true;
boolean found = false;
PluginsAndModules plugins = node.getInfo(PluginsAndModules.class);
if (plugins != null) {
for (PluginRuntimeInfo pluginInfo : plugins.getPluginInfos()) {
if (pluginInfo.descriptor().getClassname().equals(AnalysisOpenKoreanTextPlugin.class.getName())) {
found = true;
}
}
}
Assert.assertTrue(founded);
Assert.assertTrue(found);
}
}
}
}
Loading