Skip to content

Commit

Permalink
util: add fast path for Latin1 decoding
Browse files Browse the repository at this point in the history
  • Loading branch information
Mert Can Altin committed Oct 5, 2024
1 parent e1852b5 commit 2dd0714
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 0 deletions.
5 changes: 5 additions & 0 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ const {
encodeIntoResults,
encodeUtf8String,
decodeUTF8,
decodeLatin1,
} = binding;

const { Buffer } = require('buffer');
Expand Down Expand Up @@ -443,6 +444,10 @@ function makeTextDecoderICU() {
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
}

if (this[kEncoding] === 'windows-1252') {
return decodeLatin1(input);
}

this.#prepareConverter();

validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
Expand Down
40 changes: 40 additions & 0 deletions src/encoding_binding.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "encoding_binding.h"
#include "ada.h"
#include "env-inl.h"
#include "node_buffer.h"
#include "node_errors.h"
#include "node_external_reference.h"
#include "simdutf.h"
Expand Down Expand Up @@ -226,6 +227,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1);
}

void BindingData::CreatePerContextProperties(Local<Object> target,
Expand All @@ -243,6 +245,44 @@ void BindingData::RegisterTimerExternalReferences(
registry->Register(DecodeUTF8);
registry->Register(ToASCII);
registry->Register(ToUnicode);
registry->Register(DecodeLatin1);
}

void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);

CHECK_GE(args.Length(), 1);
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
args[0]->IsArrayBufferView())) {
return node::THROW_ERR_INVALID_ARG_TYPE(
env->isolate(),
"The \"input\" argument must be an instance of ArrayBuffer, "
"SharedArrayBuffer, or ArrayBufferView.");
}

ArrayBufferViewContents<uint8_t> buffer(args[0]);
const uint8_t* data = buffer.data();
size_t length = buffer.length();

if (length == 0) {
return args.GetReturnValue().SetEmptyString();
}

std::string result(length * 2, '\0');

size_t written = simdutf::convert_latin1_to_utf8(
reinterpret_cast<const char*>(data), length, &result[0]);

if (written == 0) {
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
env->isolate(), "The encoded data was not valid for encoding latin1");
}

result.resize(written);

Local<Object> buffer_result =
node::Buffer::Copy(env, result.c_str(), result.length()).ToLocalChecked();
args.GetReturnValue().Set(buffer_result);
}

} // namespace encoding_binding
Expand Down
1 change: 1 addition & 0 deletions src/encoding_binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
static void DecodeLatin1(const v8::FunctionCallbackInfo<v8::Value>& args);

static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);
Expand Down

0 comments on commit 2dd0714

Please sign in to comment.