Skip to content

Commit

Permalink
codeswitch: change string encoding from ucs-32 to utf-8
Browse files Browse the repository at this point in the history
Fixes #35
  • Loading branch information
jayconrod committed Feb 18, 2017
1 parent 8e1f9d8 commit 08b2f15
Show file tree
Hide file tree
Showing 14 changed files with 107 additions and 311 deletions.
22 changes: 8 additions & 14 deletions codeswitch/src/interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -866,21 +866,21 @@ void Interpreter::handleBuiltin(BuiltinId id) {
break;
}

case BUILTIN_STRING_FROM_CODE_POINTS_ID: {
case BUILTIN_STRING_FROM_UTF8_ID: {
String* result = nullptr;
Object* exn = nullptr;
try {
GCSafeScope gcSafe(this);
HandleScope handleScope(vm_);
auto array = handle(mem<Object*>(stack_->sp() + kPrepareForGCSize));
auto clas = handle(array->meta()->clas());
if (!clas->elementType() || !clas->elementType()->isI32()) {
if (!clas->elementType() || !clas->elementType()->isI8()) {
auto exnClass = handle(
vm_->roots()->getBuiltinClass(BUILTIN_ILLEGAL_ARGUMENT_EXCEPTION_CLASS_ID));
auto exnMeta = Class::ensureInstanceMeta(exnClass);
exn = *Object::create(vm_->heap(), exnMeta);
} else {
auto chars = reinterpret_cast<const u32*>(array->elementsBase());
auto chars = reinterpret_cast<const char*>(array->elementsBase());
result = *String::create(vm_->heap(), array->elementsLength(), chars);
}
} catch (AllocationError& e) {
Expand Down Expand Up @@ -991,14 +991,14 @@ void Interpreter::handleBuiltin(BuiltinId id) {
try {
GCSafeScope gcSafe(this);
HandleScope handleScope(vm_);
if (!cin.good()) {
if (!cin.good() || stlString.size() > kMaxLength) {
auto clas = handle(vm_->roots()->getBuiltinClass(BUILTIN_EXCEPTION_CLASS_ID));
auto meta = Class::ensureInstanceMeta(clas);
exn = *Object::create(vm_->heap(), meta);
} else {
result = *String::fromUtf8String(vm_->heap(),
reinterpret_cast<const u8*>(stlString.data()),
stlString.length());
stlString.data(),
static_cast<length_t>(stlString.length()));
}
} catch (AllocationError& e) {
doThrow(threadBindle_->takeOutOfMemoryException());
Expand Down Expand Up @@ -1337,10 +1337,7 @@ void Interpreter::intToString() {
stringstream stream;
stream << value;
auto stlString = stream.str();
auto size = stlString.length(); // same as length since these should be ascii chars
result = *String::fromUtf8String(vm_->heap(),
reinterpret_cast<const u8*>(stlString.data()),
size, size);
result = *String::fromUtf8String(vm_->heap(), stlString);
} catch (AllocationError& e) {
doThrow(threadBindle_->takeOutOfMemoryException());
return;
Expand All @@ -1361,10 +1358,7 @@ void Interpreter::floatToString() {
stringstream stream;
stream << value;
auto stlString = stream.str();
auto size = stlString.length();
result = *String::fromUtf8String(vm_->heap(),
reinterpret_cast<const u8*>(stlString.data()),
size, size);
result = *String::fromUtf8String(vm_->heap(), stlString);
} catch (AllocationError& e) {
doThrow(threadBindle_->takeOutOfMemoryException());
return;
Expand Down
7 changes: 3 additions & 4 deletions codeswitch/src/package.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -939,9 +939,8 @@ ostream& operator << (ostream& os, const PackageDependency* dep) {

Local<String> Loader::readString() {
auto length = readLengthVbn();
auto size = readLengthVbn();
vector<u8> utf8Chars = readData(size);
return String::fromUtf8String(heap(), utf8Chars.data(), length, size);
auto utf8Chars = readData(length);
return String::fromUtf8String(heap(), utf8Chars.data(), length);
}


Expand Down Expand Up @@ -1405,7 +1404,7 @@ Persistent<Package> PackageLoader::load() {
throw Error("package file is corrupt");
auto majorVersion = readValue<u16>();
auto minorVersion = readValue<u16>();
if (majorVersion != 0 || minorVersion != 22)
if (majorVersion != 0 || minorVersion != 23)
throw Error("package file has wrong format version");

auto flags = readValue<u64>();
Expand Down
2 changes: 1 addition & 1 deletion codeswitch/src/roots.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ void Roots::initialize(Heap* heap) {
stringMeta->blockType_ = STRING_BLOCK_TYPE;
basicRoots_[STRING_META_ROOT_INDEX] = stringMeta;

auto emptyString = new(heap, 0) String(nullptr);
auto emptyString = new(heap, 0) String("");
basicRoots_[EMPTY_STRING_ROOT_INDEX] = emptyString;

auto trueString = String::rawFromUtf8CString(heap, "true");
Expand Down
130 changes: 49 additions & 81 deletions codeswitch/src/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace internal {

word_t String::sizeForLength(length_t length) {
ASSERT(length <= kMaxLength);
return elementsOffset(sizeof(String), sizeof(u32)) + length * sizeof(u32);
return elementsOffset(sizeof(String), sizeof(u8)) + length;
}


Expand All @@ -35,17 +35,28 @@ void* String::operator new (size_t, Heap* heap, length_t length) {
}


String::String(const u32* chars)
String::String(const u8* chars)
: Object(STRING_BLOCK_TYPE) {
copy_n(chars, length_, chars_);
}


String::String(const char* chars)
: Object(STRING_BLOCK_TYPE) {
copy_n(reinterpret_cast<const u8*>(chars), length_, chars_);
}


String::String()
: Object(STRING_BLOCK_TYPE) { }


Local<String> String::create(Heap* heap, length_t length, const u32* chars) {
Local<String> String::create(Heap* heap, length_t length, const u8* chars) {
RETRY_WITH_GC(heap, return Local<String>(new(heap, length) String(chars)));
}


Local<String> String::create(Heap* heap, length_t length, const char* chars) {
RETRY_WITH_GC(heap, return Local<String>(new(heap, length) String(chars)));
}

Expand All @@ -55,100 +66,52 @@ Local<String> String::create(Heap* heap, length_t length) {
}


String* String::rawFromUtf8CString(Heap* heap, const char* utf8Chars) {
word_t size = strlen(utf8Chars);
length_t length = 0;
auto p = reinterpret_cast<const u8*>(utf8Chars);
auto end = p + size;
while (p != end) {
auto ch = utf8Decode(&p, end);
if (ch == UTF8_DECODE_ERROR)
throw Error("invalid utf8 string");
length++;
}
auto string = new(heap, length) String;
auto chars = string->chars_;
p = reinterpret_cast<const u8*>(utf8Chars);
for (length_t i = 0; i < length; i++) {
auto ch = utf8Decode(&p, end);
ASSERT(ch != UTF8_DECODE_ERROR);
chars[i] = ch;
}
return string;
String* String::rawFromUtf8CString(Heap* heap, const u8* chars) {
length_t length = strlen(reinterpret_cast<const char*>(chars));
ASSERT(length <= kMaxLength);
return new(heap, length) String(chars);
}


Local<String> String::fromUtf8String(Heap* heap, const std::string& stlString) {
const u8* chars = reinterpret_cast<const u8*>(stlString.data());
word_t size = stlString.size();
return fromUtf8String(heap, chars, size);
String* String::rawFromUtf8CString(Heap* heap, const char* chars) {
return rawFromUtf8CString(heap, reinterpret_cast<const u8*>(chars));
}


Local<String> String::fromUtf8String(Heap* heap, const u8* utf8Chars,
length_t length, word_t size) {
auto string = String::create(heap, length);
u32* chars = string->chars_;
auto end = utf8Chars + size;
length_t i;
for (i = 0; i < length; i++) {
auto ch = utf8Decode(&utf8Chars, end);
if (ch == UTF8_DECODE_ERROR)
throw Error("invalid utf8 string");
chars[i] = ch;
}
if (utf8Chars != end)
throw Error("invalid utf8 string");
return string;
Local<String> String::fromUtf8CString(Heap* heap, const u8* chars) {
length_t length = strnlen(reinterpret_cast<const char*>(chars), kMaxLength);
return create(heap, length, chars);
}


Local<String> String::fromUtf8CString(Heap* heap, const char* utf8Chars) {
word_t size = strlen(utf8Chars);
return fromUtf8String(heap, reinterpret_cast<const u8*>(utf8Chars), size);
Local<String> String::fromUtf8String(Heap* heap, const std::string& stlString) {
ASSERT(stlString.size() <= kMaxLength);
return create(heap, stlString.size(), stlString.data());
}


Local<String> String::fromUtf8String(Heap* heap, const u8* utf8Chars, word_t size) {
length_t length = 0;
auto p = utf8Chars;
auto end = p + size;
while (p != end) {
auto ch = utf8Decode(&p, end);
if (ch == UTF8_DECODE_ERROR)
throw Error("invalid utf8 string");
length++;
}
return fromUtf8String(heap, utf8Chars, length, size);
Local<String> String::fromUtf8String(Heap* heap, const u8* chars, length_t length) {
return create(heap, length, chars);
}


word_t String::utf8EncodedSize() const {
word_t size = 0;
for (length_t i = 0; i < length(); i++) {
size += utf8EncodeSize(get(i));
}
return size;
Local<String> String::fromUtf8CString(Heap* heap, const char* utf8Chars) {
return fromUtf8CString(heap, reinterpret_cast<const u8*>(utf8Chars));
}


vector<u8> String::toUtf8StlVector() const {
word_t size = utf8EncodedSize();
vector<u8> utf8Chars(size);
auto p = utf8Chars.data();
auto end = p + size;
for (length_t i = 0; i < length(); i++) {
utf8Encode(get(i), &p);
}
ASSERT(p == end);
return utf8Chars;
Local<String> String::fromUtf8String(Heap* heap, const char* utf8Chars, length_t length) {
return fromUtf8String(heap, reinterpret_cast<const u8*>(utf8Chars), length);
}


vector<u8> String::toUtf8StlVector() const {
return vector<u8>(chars_, chars_ + length_);
}


string String::toUtf8StlString() const {
vector<u8> utf8Chars = toUtf8StlVector();
return string(reinterpret_cast<char*>(utf8Chars.data()), utf8Chars.size());
return string(reinterpret_cast<const char*>(chars_), length_);
}


Expand All @@ -163,16 +126,21 @@ bool String::equals(const String* other) const {
}


bool String::equals(const char* other) const {
bool String::equals(const u8* other) const {
length_t i;
for (i = 0; i < length() && other[i] != '\0'; i++) {
if (get(i) != static_cast<u32>(other[i]))
if (get(i) != other[i])
return false;
}
return other[i] == '\0';
}


bool String::equals(const char* other) const {
return equals(reinterpret_cast<const u8*>(other));
}


int String::compare(String* other) const {
auto minLength = min(length(), other->length());
int cmp;
Expand Down Expand Up @@ -232,7 +200,7 @@ Local<String> String::substring(const Handle<String>& string,
}


length_t String::find(u32 needle, length_t start) const {
length_t String::find(u8 needle, length_t start) const {
ASSERT(start <= length());

for (length_t i = start; i < length(); i++) {
Expand Down Expand Up @@ -261,7 +229,7 @@ length_t String::find(String* needle, length_t start) const {
}


length_t String::count(u32 needle) const {
length_t String::count(u8 needle) const {
length_t pos = 0;
length_t count = 0;
while ((pos = find(needle, pos)) != kIndexNotSet) {
Expand Down Expand Up @@ -289,7 +257,7 @@ length_t String::count(String* needle) const {
}


Local<BlockArray<String>> String::split(Heap* heap, const Handle<String>& string, u32 sep) {
Local<BlockArray<String>> String::split(Heap* heap, const Handle<String>& string, u8 sep) {
auto count = string->count(sep);
auto pieces = BlockArray<String>::create(heap, count + 1);
length_t pos = 0;
Expand Down Expand Up @@ -383,7 +351,7 @@ bool String::tryToI32(i32* n) const {
i64 limit = sign < 0 ? -static_cast<i64>(INT32_MIN) : INT32_MAX;
for (length_t i = start; i < length(); i++) {
auto d = get(i);
if (!inRange<u32>(d, '0', '9'))
if (!inRange<u8>(d, '0', '9'))
return false;
auto v = d - '0';
value = 10 * value + v;
Expand All @@ -396,7 +364,7 @@ bool String::tryToI32(i32* n) const {
}


u32 String::iterator::operator * () const {
u8 String::iterator::operator * () const {
return str_->get(index_);
}

Expand Down
Loading

0 comments on commit 08b2f15

Please sign in to comment.