diff --git a/Rakefile b/Rakefile index 9681be0d..d09e8194 100644 --- a/Rakefile +++ b/Rakefile @@ -86,7 +86,8 @@ end JAVA_DIR = "java/src/json/ext" JAVA_RAGEL_PATH = "#{JAVA_DIR}/ParserConfig.rl" JAVA_PARSER_SRC = "#{JAVA_DIR}/ParserConfig.java" -JAVA_SOURCES = FileList["#{JAVA_DIR}/*.java"] +JAVA_SOURCES = FileList["#{JAVA_DIR}/*.java"].exclude("#{JAVA_DIR}/Vectorized*.java") +JAVA_VEC_SOURCES = FileList["#{JAVA_DIR}/Vectorized*.java"] JAVA_CLASSES = [] JRUBY_PARSER_JAR = File.expand_path("lib/json/ext/parser.jar") JRUBY_GENERATOR_JAR = File.expand_path("lib/json/ext/generator.jar") @@ -142,8 +143,8 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' JRUBY_JAR = File.join(CONFIG["libdir"], "jruby.jar") if File.exist?(JRUBY_JAR) + classpath = (Dir['java/lib/*.jar'] << 'java/src' << JRUBY_JAR) * path_separator JAVA_SOURCES.each do |src| - classpath = (Dir['java/lib/*.jar'] << 'java/src' << JRUBY_JAR) * path_separator obj = src.sub(/\.java\Z/, '.class') file obj => src do if File.exist?(File.join(ENV['JAVA_HOME'], "lib", "modules")) @@ -154,6 +155,20 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' end JAVA_CLASSES << obj end + + JAVA_VEC_SOURCES.each do |src| + obj = src.sub(/\.java\Z/, '.class') + file obj => src do + sh 'javac', '--add-modules', 'jdk.incubator.vector', '-classpath', classpath, '--release', '16', src do |success, status| + if success + puts "*** 'jdk.incubator.vector' support enabled ***" + else + puts "*** 'jdk.incubator.vector' support disabled ***" + end + end + end + JAVA_CLASSES << obj + end else warn "WARNING: Cannot find jruby in path => Cannot build jruby extension!" end @@ -199,11 +214,13 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' generator_classes = FileList[ "json/ext/*ByteList*.class", "json/ext/OptionsReader*.class", + "json/ext/EscapeScanner*.class", "json/ext/Generator*.class", "json/ext/RuntimeInfo*.class", "json/ext/*StringEncoder*.class", "json/ext/Utils*.class" ] + puts "Creating generator jar with classes: #{generator_classes.join(', ')}" sh 'jar', 'cf', File.basename(JRUBY_GENERATOR_JAR), *generator_classes mv File.basename(JRUBY_GENERATOR_JAR), File.dirname(JRUBY_GENERATOR_JAR) end diff --git a/java/src/json/ext/SWARBasicStringEncoder.java b/java/src/json/ext/SWARBasicStringEncoder.java index a6695d99..bd0d1a70 100644 --- a/java/src/json/ext/SWARBasicStringEncoder.java +++ b/java/src/json/ext/SWARBasicStringEncoder.java @@ -71,7 +71,7 @@ void encode(ByteList src) throws IOException { } } - private boolean skipChunk(long x) { + boolean skipChunk(long x) { long is_ascii = 0x8080808080808080L & ~x; long xor2 = x ^ 0x0202020202020202L; long lt32_or_eq34 = xor2 - 0x2121212121212121L; @@ -80,7 +80,7 @@ private boolean skipChunk(long x) { return ((lt32_or_eq34 | eq92) & is_ascii) == 0; } - private boolean skipChunk(int x) { + boolean skipChunk(int x) { int is_ascii = 0x80808080 & ~x; int xor2 = x ^ 0x02020202; int lt32_or_eq34 = xor2 - 0x21212121; diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index 7f75476d..7c9603c1 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -7,6 +7,8 @@ import java.io.IOException; import java.io.OutputStream; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; import java.nio.charset.StandardCharsets; import org.jcodings.Encoding; @@ -114,11 +116,39 @@ class StringEncoder extends ByteListTranscoder { protected final byte[] escapeTable; + private static final String VECTORIZED_STRING_ENCODER_CLASS = "json.ext.VectorizedStringEncoder"; + private static final String USE_VECTORIZED_BASIC_ENCODER_PROP = "jruby.json.useVectorizedBasicEncoder"; + private static final String USE_VECTORIZED_BASIC_ENCODER_DEFAULT = "false"; + private static final boolean USE_VECTORIZED_BASIC_ENCODER; + private static final StringEncoder VECTORIZED_SCANNER; + private static final String USE_SWAR_BASIC_ENCODER_PROP = "jruby.json.useSWARBasicEncoder"; private static final String USE_SWAR_BASIC_ENCODER_DEFAULT = "true"; private static final boolean USE_BASIC_SWAR_ENCODER; static { + String enableVectorizedScanner = System.getProperty(USE_VECTORIZED_BASIC_ENCODER_PROP, USE_VECTORIZED_BASIC_ENCODER_DEFAULT); + if ("true".equalsIgnoreCase(enableVectorizedScanner) || "1".equalsIgnoreCase(enableVectorizedScanner)) { + StringEncoder scanner; + try { + Class vectorizedStringEncoderClass = StringEncoder.class.getClassLoader().loadClass(VECTORIZED_STRING_ENCODER_CLASS); + Constructor vectorizedStringEncoderConstructor = vectorizedStringEncoderClass.getDeclaredConstructor(); + scanner = (StringEncoder) vectorizedStringEncoderConstructor.newInstance(); + // System.out.println(scanner.getClass().getName() + " loaded successfully."); + } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) { + // Fallback to the StringEncoder if we cannot load the VectorizedStringEncoder. + // System.err.println("Failed to load VectorizedStringEncoder, falling back to StringEncoder:"); + // e.printStackTrace(); + scanner = null; + } + VECTORIZED_SCANNER = scanner; + USE_VECTORIZED_BASIC_ENCODER = scanner != null; + } else { + System.err.println("VectorizedStringEncoder disabled."); + VECTORIZED_SCANNER = null; + USE_VECTORIZED_BASIC_ENCODER = false; + } + USE_BASIC_SWAR_ENCODER = Boolean.parseBoolean( System.getProperty(USE_SWAR_BASIC_ENCODER_PROP, USE_SWAR_BASIC_ENCODER_DEFAULT)); // XXX Is there a logger we can use here? @@ -149,8 +179,15 @@ class StringEncoder extends ByteListTranscoder { this.escapeTable = escapeTable; } + @Override + public StringEncoder clone() { + return new StringEncoder(escapeTable); + } + static StringEncoder createBasicEncoder() { - if (USE_BASIC_SWAR_ENCODER) { + if (USE_VECTORIZED_BASIC_ENCODER) { + return (StringEncoder) VECTORIZED_SCANNER.clone(); + } else if (USE_BASIC_SWAR_ENCODER) { return new SWARBasicStringEncoder(); } else { return new StringEncoder(false); diff --git a/java/src/json/ext/VectorizedStringEncoder.java b/java/src/json/ext/VectorizedStringEncoder.java new file mode 100644 index 00000000..14b3c8d7 --- /dev/null +++ b/java/src/json/ext/VectorizedStringEncoder.java @@ -0,0 +1,104 @@ +package json.ext; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.jruby.util.ByteList; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorMask; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; + +class VectorizedStringEncoder extends SWARBasicStringEncoder { + private static final VectorSpecies SP = ByteVector.SPECIES_PREFERRED; + private static final ByteVector ZERO = ByteVector.zero(SP); + private static final ByteVector TWO = ByteVector.broadcast(SP, 2); + private static final ByteVector THIRTY_THREE = ByteVector.broadcast(SP, 33); + private static final ByteVector BACKSLASH = ByteVector.broadcast(SP, '\\'); + + @Override + public StringEncoder clone() { + return new VectorizedStringEncoder(); + } + + @Override + void encode(ByteList src) throws IOException { + byte[] ptrBytes = src.unsafeBytes(); + int ptr = src.begin(); + int len = src.realSize(); + int beg = 0; + int pos = ptr; + + while ((pos + SP.length() <= len)) { + ByteVector chunk = ByteVector.fromArray(SP, ptrBytes, ptr + pos); + // bytes are signed in java, so we need to remove negative values + VectorMask negative = chunk.lt(ZERO); + VectorMask tooLowOrDblQuote = chunk.lanewise(VectorOperators.XOR, TWO).lt(THIRTY_THREE).andNot(negative); + VectorMask needsEscape = chunk.eq(BACKSLASH).or(tooLowOrDblQuote); + if (needsEscape.anyTrue()) { + int chunkStart = pos; + long mask = needsEscape.toLong(); + + while(mask > 0) { + // nextMatch inlined + int index = Long.numberOfTrailingZeros(mask); + mask &= (mask - 1); + pos = chunkStart + index; + int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); + + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); + escapeAscii(ch, aux, HEX); + } + + // Skip over any remaining characters in the current chunk + pos = chunkStart + SP.length(); + continue; + } + + pos += SP.length(); + } + + ByteBuffer bb = ByteBuffer.wrap(ptrBytes, ptr, len); + if (pos + 8 <= len) { + long x = bb.getLong(ptr + pos); + if (skipChunk(x)) { + pos += 8; + } else { + int chunkEnd = ptr + pos + 8; + while (pos < chunkEnd) { + int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); + int ch_len = ESCAPE_TABLE[ch]; + if (ch_len > 0) { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); + escapeAscii(ch, aux, HEX); + } else { + pos++; + } + } + } + } + + if (pos + 4 <= len) { + int x = bb.getInt(ptr + pos); + if (skipChunk(x)) { + pos += 4; + } + } + + while (pos < len) { + int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); + int ch_len = ESCAPE_TABLE[ch]; + if (ch_len > 0) { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); + escapeAscii(ch, aux, HEX); + } else { + pos++; + } + } + + if (beg < len) { + append(ptrBytes, ptr + beg, len - beg); + } + } +} diff --git a/test/json/json_encoding_test.rb b/test/json/json_encoding_test.rb index 2789e94b..7ac06b2a 100644 --- a/test/json/json_encoding_test.rb +++ b/test/json/json_encoding_test.rb @@ -37,6 +37,10 @@ def test_generate_shared_string assert_equal '"234567890"', JSON.dump(s[2..-1]) s = '01234567890123456789"a"b"c"d"e"f"g"h' assert_equal '"\"a\"b\"c\"d\"e\"f\"g\""', JSON.dump(s[20, 15]) + s = "0123456789001234567890012345678900123456789001234567890" + assert_equal '"23456789001234567890012345678900123456789001234567890"', JSON.dump(s[2..-1]) + s = "0123456789001234567890012345678900123456789001234567890" + assert_equal '"567890012345678900123456789001234567890012345678"', JSON.dump(s[5..-3]) end def test_unicode