Skip to content

Commit

Permalink
feat: utf8Len, utf8Codepoints, utf8Valid
Browse files Browse the repository at this point in the history
closes #39
  • Loading branch information
giann committed Aug 30, 2023
1 parent 81c9c72 commit 7c90254
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 7 deletions.
64 changes: 63 additions & 1 deletion src/builtin/str.zig
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,69 @@ pub fn trim(ctx: *NativeCtx) c_int {
pub fn len(ctx: *NativeCtx) c_int {
const str: *ObjString = ObjString.cast(ctx.vm.peek(0).obj()).?;

ctx.vm.push(Value.fromInteger(@as(i32, @intCast(str.string.len))));
ctx.vm.push(Value.fromInteger(@intCast(str.string.len)));

return 1;
}

pub fn utf8Len(ctx: *NativeCtx) c_int {
const str: *ObjString = ObjString.cast(ctx.vm.peek(0).obj()).?;

ctx.vm.push(
Value.fromInteger(
@intCast(std.unicode.utf8CountCodepoints(str.string) catch 0),
),
);

return 1;
}

pub fn utf8Valid(ctx: *NativeCtx) c_int {
const str: *ObjString = ObjString.cast(ctx.vm.peek(0).obj()).?;

ctx.vm.push(
Value.fromBoolean(
std.unicode.utf8ValidateSlice(str.string),
),
);

return 1;
}

pub fn utf8Codepoints(ctx: *NativeCtx) c_int {
const str: *ObjString = ObjString.cast(ctx.vm.peek(0).obj()).?;

var list_def: ObjList.ListDef = ObjList.ListDef.init(
ctx.vm.gc.allocator,
ctx.vm.gc.type_registry.getTypeDef(.{ .def_type = .String }) catch @panic("Could not create list"),
);

var list_def_union: ObjTypeDef.TypeUnion = .{
.List = list_def,
};

var list_def_type: *ObjTypeDef = ctx.vm.gc.type_registry.getTypeDef(ObjTypeDef{
.def_type = .List,
.optional = false,
.resolved_type = list_def_union,
}) catch @panic("Could not create list");

var list = (ctx.vm.gc.allocateObject(
ObjList,
ObjList.init(ctx.vm.gc.allocator, list_def_type),
) catch @panic("Could not create list"));

if (std.unicode.utf8ValidateSlice(str.string)) {
const view = std.unicode.Utf8View.init(str.string) catch unreachable;
var it = view.iterator();
while (it.nextCodepointSlice()) |codepoint| {
const codepoint_str = ctx.vm.gc.copyString(codepoint) catch @panic("Could not get codepoints");

list.rawAppend(ctx.vm.gc, codepoint_str.toValue()) catch @panic("Could not get codepoints");
}
}

ctx.vm.push(list.toValue());

return 1;
}
Expand Down
24 changes: 22 additions & 2 deletions src/obj.zig
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,9 @@ pub const ObjString = struct {
NativeFn,
.{
.{ "len", buzz_builtin.str.len },
.{ "utf8Len", buzz_builtin.str.utf8Len },
.{ "utf8Valid", buzz_builtin.str.utf8Valid },
.{ "utf8Codepoints", buzz_builtin.str.utf8Codepoints },
.{ "trim", buzz_builtin.str.trim },
.{ "byte", buzz_builtin.str.byte },
.{ "indexOf", buzz_builtin.str.indexOf },
Expand Down Expand Up @@ -557,8 +560,25 @@ pub const ObjString = struct {
try parser.gc.objstring_memberDefs.put("len", native_type);

return native_type;
}
if (mem.eql(u8, method, "trim")) {
} else if (mem.eql(u8, method, "utf8Len")) {
const native_type = try parser.parseTypeDefFrom("extern Function utf8Len() > int");

try parser.gc.objstring_memberDefs.put("utf8Len", native_type);

return native_type;
} else if (mem.eql(u8, method, "utf8Valid")) {
const native_type = try parser.parseTypeDefFrom("extern Function utf8Valid() > bool");

try parser.gc.objstring_memberDefs.put("utf8Valid", native_type);

return native_type;
} else if (mem.eql(u8, method, "utf8Codepoints")) {
const native_type = try parser.parseTypeDefFrom("extern Function utf8Codepoints() > [str]");

try parser.gc.objstring_memberDefs.put("utf8Codepoints", native_type);

return native_type;
} else if (mem.eql(u8, method, "trim")) {
const native_type = try parser.parseTypeDefFrom("extern Function trim() > str");

try parser.gc.objstring_memberDefs.put("trim", native_type);
Expand Down
9 changes: 5 additions & 4 deletions tests/058-ffi.buzz
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ import "ffi";

| TODO: one zdef for one lib and multiple declarations
| TODO: Allow multiple declarations in one string
zdef("tests/utils/libforeign", "fn acos(value: f64) f64;");
| `[*:0]const u8` is the only pointer type that will be handled with an ObjString
zdef("tests/utils/libforeign", "fn fprint(msg: [*:0]const u8) void;");
zdef("tests/utils/libforeign", "fn sum(values: [*]i32, len: i32) i32;");
zdef("tests/utils/libforeign", `
fn acos(value: f64) f64;
fn fprint(msg: [*:0]const u8) void;
fn sum(values: [*]i32, len: i32) i32;
`);

test "scalar type" {
assert(acos(0.12) == 1.4505064444001086, message: "Could call FFI function with scalar arguments");
Expand Down
15 changes: 15 additions & 0 deletions tests/061-utf8.buzz
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import "std";

test "utf8" {
str msg = "hello 🔥 buzz !";

assert(msg.utf8Len() == 14, message: "Could get length of utf8 string");
assert(msg.utf8Valid(), message: "Could validate utf8 string");

str invalid = "hello \232 world!";

assert(!invalid.utf8Valid(), message: "Could not validate invalid utf8 string");

[str] codepoints = "I'm 🦇-man so 🔥 !".utf8Codepoints();
assert(codepoints[4] == "🦇", message: "Could get utf8 string codepoints");
}

0 comments on commit 7c90254

Please sign in to comment.