diff --git a/src/node.cc b/src/node.cc index 1bee586033515c..22c4a9b1547e60 100644 --- a/src/node.cc +++ b/src/node.cc @@ -139,9 +139,6 @@ static uv_async_t dispatch_debug_messages_async; static Isolate* node_isolate = nullptr; -int WRITE_UTF8_FLAGS = v8::String::HINT_MANY_WRITES_EXPECTED | - v8::String::NO_NULL_TERMINATION; - class ArrayBufferAllocator : public ArrayBuffer::Allocator { public: // Impose an upper limit to avoid out of memory errors that bring down @@ -3819,11 +3816,6 @@ static void StartNodeInstance(void* arg) { int Start(int argc, char** argv) { PlatformInit(); - const char* replace_invalid = secure_getenv("NODE_INVALID_UTF8"); - - if (replace_invalid == nullptr) - WRITE_UTF8_FLAGS |= String::REPLACE_INVALID_UTF8; - CHECK_GT(argc, 0); // Hack around with the argv pointer. Used for process.title = "blah". diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 2d00ca97cc0f58..fa77c0779762a6 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -415,9 +415,6 @@ void StringWrite(const FunctionCallbackInfo& args) { if (max_length == 0) return args.GetReturnValue().Set(0); - if (encoding == UCS2) - max_length = max_length / 2; - if (offset >= obj_length) return env->ThrowRangeError("Offset is out of bounds"); diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 1df6879a6a0346..4f896ace3fb693 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -28,8 +28,7 @@ class ExternString: public ResourceType { public: ~ExternString() override { delete[] data_; - int64_t change_in_bytes = -static_cast(length_); - isolate()->AdjustAmountOfExternalAllocatedMemory(change_in_bytes); + isolate()->AdjustAmountOfExternalAllocatedMemory(-byte_length()); } const TypeName* data() const override { @@ -40,6 +39,10 @@ class ExternString: public ResourceType { return length_; } + int64_t byte_length() const { + return length() * sizeof(*data()); + } + static Local NewFromCopy(Isolate* isolate, const TypeName* data, size_t length) { @@ -69,7 +72,7 @@ class ExternString: public ResourceType { data, length); Local str = String::NewExternal(isolate, h_str); - isolate->AdjustAmountOfExternalAllocatedMemory(length); + isolate->AdjustAmountOfExternalAllocatedMemory(h_str->byte_length()); return scope.Escape(str); } @@ -260,7 +263,7 @@ bool StringBytes::GetExternalParts(Isolate* isolate, const String::ExternalStringResource* ext; ext = str->GetExternalStringResource(); *data = reinterpret_cast(ext->data()); - *len = ext->length(); + *len = ext->length() * sizeof(*ext->data()); return true; } @@ -276,82 +279,83 @@ size_t StringBytes::Write(Isolate* isolate, int* chars_written) { HandleScope scope(isolate); const char* data = nullptr; - size_t len = 0; - bool is_extern = GetExternalParts(isolate, val, &data, &len); - size_t extlen = len; + size_t nbytes = 0; + const bool is_extern = GetExternalParts(isolate, val, &data, &nbytes); + const size_t external_nbytes = nbytes; CHECK(val->IsString() == true); Local str = val.As(); - len = len < buflen ? len : buflen; - int flags = String::NO_NULL_TERMINATION | - String::HINT_MANY_WRITES_EXPECTED; + if (nbytes > buflen) + nbytes = buflen; + + int flags = String::HINT_MANY_WRITES_EXPECTED | + String::NO_NULL_TERMINATION | + String::REPLACE_INVALID_UTF8; switch (encoding) { case ASCII: case BINARY: case BUFFER: - if (is_extern) - memcpy(buf, data, len); - else - len = str->WriteOneByte(reinterpret_cast(buf), - 0, - buflen, - flags); + if (is_extern && str->IsOneByte()) { + memcpy(buf, data, nbytes); + } else { + uint8_t* const dst = reinterpret_cast(buf); + nbytes = str->WriteOneByte(dst, 0, buflen, flags); + } if (chars_written != nullptr) - *chars_written = len; + *chars_written = nbytes; break; case UTF8: - if (is_extern) - // TODO(tjfontaine) should this validate invalid surrogate pairs as - // well? - memcpy(buf, data, len); - else - len = str->WriteUtf8(buf, buflen, chars_written, WRITE_UTF8_FLAGS); + nbytes = str->WriteUtf8(buf, buflen, chars_written, flags); break; - case UCS2: - if (is_extern) - memcpy(buf, data, len * 2); - else - len = str->Write(reinterpret_cast(buf), 0, buflen, flags); + case UCS2: { + uint16_t* const dst = reinterpret_cast(buf); + size_t nchars; + if (is_extern && !str->IsOneByte()) { + memcpy(buf, data, nbytes); + nchars = nbytes / sizeof(*dst); + } else { + nchars = buflen / sizeof(*dst); + nchars = str->Write(dst, 0, nchars, flags); + nbytes = nchars * sizeof(*dst); + } if (IsBigEndian()) { // Node's "ucs2" encoding wants LE character data stored in // the Buffer, so we need to reorder on BE platforms. See // http://nodejs.org/api/buffer.html regarding Node's "ucs2" // encoding specification - uint16_t* buf16 = reinterpret_cast(buf); - for (size_t i = 0; i < len; i++) { - buf16[i] = (buf16[i] << 8) | (buf16[i] >> 8); - } + for (size_t i = 0; i < nchars; i++) + dst[i] = dst[i] << 8 | dst[i] >> 8; } if (chars_written != nullptr) - *chars_written = len; - len = len * sizeof(uint16_t); + *chars_written = nchars; break; + } case BASE64: if (is_extern) { - len = base64_decode(buf, buflen, data, extlen); + nbytes = base64_decode(buf, buflen, data, external_nbytes); } else { String::Value value(str); - len = base64_decode(buf, buflen, *value, value.length()); + nbytes = base64_decode(buf, buflen, *value, value.length()); } if (chars_written != nullptr) { - *chars_written = len; + *chars_written = nbytes; } break; case HEX: if (is_extern) { - len = hex_decode(buf, buflen, data, extlen); + nbytes = hex_decode(buf, buflen, data, external_nbytes); } else { String::Value value(str); - len = hex_decode(buf, buflen, *value, value.length()); + nbytes = hex_decode(buf, buflen, *value, value.length()); } if (chars_written != nullptr) { - *chars_written = len * 2; + *chars_written = nbytes; } break; @@ -360,7 +364,7 @@ size_t StringBytes::Write(Isolate* isolate, break; } - return len; + return nbytes; } @@ -754,21 +758,17 @@ Local StringBytes::Encode(Isolate* isolate, Local StringBytes::Encode(Isolate* isolate, const uint16_t* buf, size_t buflen) { - const uint16_t* src = buf; - Local val; + if (buflen < EXTERN_APEX) { val = String::NewFromTwoByte(isolate, - src, + buf, String::kNormalString, buflen); } else { - val = ExternTwoByteString::NewFromCopy(isolate, src, buflen); + val = ExternTwoByteString::NewFromCopy(isolate, buf, buflen); } - if (src != buf) - delete[] src; - return val; } diff --git a/src/string_bytes.h b/src/string_bytes.h index 424d9245aad4dc..2fcfedaa098b67 100644 --- a/src/string_bytes.h +++ b/src/string_bytes.h @@ -10,8 +10,6 @@ namespace node { -extern int WRITE_UTF8_FLAGS; - class StringBytes { public: class InlineDecoder { diff --git a/src/util.cc b/src/util.cc index 1c57a976e1164f..f382b3d565a8cf 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1,36 +1,28 @@ #include "util.h" - #include "string_bytes.h" namespace node { Utf8Value::Utf8Value(v8::Isolate* isolate, v8::Handle value) - : length_(0), str_(nullptr) { + : length_(0), str_(str_st_) { if (value.IsEmpty()) return; - v8::Local val_ = value->ToString(isolate); - if (val_.IsEmpty()) + v8::Local string = value->ToString(isolate); + if (string.IsEmpty()) return; // Allocate enough space to include the null terminator - size_t len = StringBytes::StorageSize(val_, UTF8) + 1; - - char* str; - if (len > kStorageSize) - str = static_cast(malloc(len)); - else - str = str_st_; - CHECK_NE(str, NULL); - - int flags = WRITE_UTF8_FLAGS; - - length_ = val_->WriteUtf8(str, - len, - 0, - flags); - str[length_] = '\0'; - - str_ = reinterpret_cast(str); + size_t len = StringBytes::StorageSize(string, UTF8) + 1; + if (len > sizeof(str_st_)) { + str_ = static_cast(malloc(len)); + CHECK_NE(str_, nullptr); + } + + const int flags = + v8::String::NO_NULL_TERMINATION | v8::String::REPLACE_INVALID_UTF8; + length_ = string->WriteUtf8(str_, len, 0, flags); + str_[length_] = '\0'; } + } // namespace node diff --git a/src/util.h b/src/util.h index 5742252688111b..ea17a155745993 100644 --- a/src/util.h +++ b/src/util.h @@ -190,10 +190,9 @@ class Utf8Value { }; private: - static const int kStorageSize = 1024; size_t length_; - char str_st_[kStorageSize]; char* str_; + char str_st_[1024]; }; } // namespace node diff --git a/test/parallel/test-stringbytes-external.js b/test/parallel/test-stringbytes-external.js index 331897286ad165..5bc4c945e87705 100644 --- a/test/parallel/test-stringbytes-external.js +++ b/test/parallel/test-stringbytes-external.js @@ -15,15 +15,10 @@ assert.equal(b[0], 0x61); assert.equal(b[1], 0); assert.equal(ucs2_control, c); - -// grow the strings to proper length -while (write_str.length <= EXTERN_APEX) { - write_str += write_str; - ucs2_control += ucs2_control; -} -write_str += write_str.substr(0, EXTERN_APEX - write_str.length); -ucs2_control += ucs2_control.substr(0, EXTERN_APEX * 2 - ucs2_control.length); - +// now create big strings +var size = 1 + (1 << 20); +write_str = Array(size).join(write_str); +ucs2_control = Array(size).join(ucs2_control); // check resultant buffer and output string var b = new Buffer(write_str, 'ucs2'); @@ -111,3 +106,16 @@ var PRE_3OF4_APEX = Math.ceil((EXTERN_APEX / 4) * 3) - RADIOS; } } })(); + +// https://github.com/iojs/io.js/issues/1024 +(function() { + var a = Array(1 << 20).join('x'); + var b = Buffer(a, 'ucs2').toString('ucs2'); + var c = Buffer(b, 'utf8').toString('utf8'); + + assert.equal(a.length, b.length); + assert.equal(b.length, c.length); + + assert.equal(a, b); + assert.equal(b, c); +})();