@@ -155,20 +155,30 @@ pub fn bert_pre_tokenizer() -> PreTokenizer {
155155#[ napi]
156156pub fn metaspace_pre_tokenizer (
157157 #[ napi( ts_arg_type = "string = '▁'" ) ] replacement : Option < String > ,
158- #[ napi( ts_arg_type = "bool = true" ) ] add_prefix_space : Option < bool > ,
158+ #[ napi( ts_arg_type = "prepend_scheme = 'always'" ) ] prepend_scheme : Option < String > ,
159+ #[ napi( ts_arg_type = "split = true" ) ] split : Option < bool > ,
159160) -> Result < PreTokenizer > {
160- let add_prefix_space = add_prefix_space. unwrap_or ( true ) ;
161+ use tk:: pre_tokenizers:: metaspace:: PrependScheme ;
162+ let split = split. unwrap_or ( true ) ;
161163 let replacement = replacement. unwrap_or ( "▁" . to_string ( ) ) ;
162164 if replacement. chars ( ) . count ( ) != 1 {
163165 return Err ( Error :: from_reason (
164166 "replacement is supposed to be a single char" ,
165167 ) ) ;
166168 }
167169 let replacement = replacement. chars ( ) . next ( ) . unwrap ( ) ;
170+ let prepend_scheme: PrependScheme = match prepend_scheme. unwrap_or ( String :: from ( "always" ) ) . as_str ( ) {
171+ "always" => PrependScheme :: Always ,
172+ "first" => PrependScheme :: First ,
173+ "never" => PrependScheme :: Never ,
174+ _ => { return Err ( Error :: from_reason (
175+ "prepend_scheme is supposed to be either 'always', 'first' or 'never'" ,
176+ ) ) ; }
177+ } ;
168178
169179 Ok ( PreTokenizer {
170180 pretok : Some ( Arc :: new ( RwLock :: new (
171- tk:: pre_tokenizers:: metaspace:: Metaspace :: new ( replacement, add_prefix_space ) . into ( ) ,
181+ tk:: pre_tokenizers:: metaspace:: Metaspace :: new ( replacement, prepend_scheme , split ) . into ( ) ,
172182 ) ) ) ,
173183 } )
174184}
0 commit comments