Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minimum viable type validation #403

Merged
merged 15 commits into from
Jul 17, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Remove ControlFrame::stack_height
gumb0 committed Jul 17, 2020
commit fdda407eb1dde84481a0563d174ce3e20311aa63
121 changes: 54 additions & 67 deletions lib/fizzy/parser_expr.cpp
Original file line number Diff line number Diff line change
@@ -45,13 +45,8 @@ struct ControlFrame
size_t immediates_offset{0};

/// The frame stack height of the parent frame.
/// TODO: Storing this is not strictly required, as the parent frame is available
/// in the control_stack.
const int parent_stack_height{0};

/// The frame stack height.
int stack_height{0};

/// Whether the remainder of the block is unreachable (used to handle stack-polymorphic typing
/// after branches).
bool unreachable{false};
@@ -65,8 +60,7 @@ struct ControlFrame
type{_type},
code_offset{_code_offset},
immediates_offset{_immediates_offset},
parent_stack_height{_parent_stack_height},
stack_height{_parent_stack_height}
parent_stack_height{_parent_stack_height}
{}
};

@@ -89,27 +83,16 @@ parser_result<std::optional<ValType>> parse_blocktype(const uint8_t* pos, const
return {validate_valtype(type), pos};
}

void update_operand_stack(ControlFrame& frame, Stack<ValType>& operand_stack,
void update_operand_stack(const ControlFrame& frame, Stack<ValType>& operand_stack,
span<const ValType> inputs, span<const ValType> outputs)
{
const auto frame_stack_height = static_cast<int>(operand_stack.size());
const auto inputs_size = static_cast<int>(inputs.size());
const auto outputs_size = static_cast<int>(outputs.size());

// Update frame.stack_height.
if (frame.stack_height < frame.parent_stack_height + inputs_size)
{
// Stack is polymorphic after unreachable instruction: underflow is ignored,
// but we need to count stack growth to detect extra values at the end of the block.
if (frame.unreachable)
{
// Add instruction's/call's output values only.
frame.stack_height = frame.parent_stack_height + outputs_size;
}
else
throw validation_error{"stack underflow"};
}
else
frame.stack_height += outputs_size - inputs_size;
// Stack is polymorphic after unreachable instruction: underflow is ignored,
// but we need to count stack growth to detect extra values at the end of the block.
if (!frame.unreachable && frame_stack_height < frame.parent_stack_height + inputs_size)
throw validation_error{"stack underflow"};

// Update operand_stack.
for (auto it = inputs.rbegin(); it != inputs.rend(); ++it)
@@ -129,20 +112,22 @@ void update_operand_stack(ControlFrame& frame, Stack<ValType>& operand_stack,
operand_stack.push(output_type);
}

void validate_result_count(const ControlFrame& frame)
void validate_result_count(const ControlFrame& frame, const Stack<ValType>& operand_stack)
{
const auto frame_stack_height = static_cast<int>(operand_stack.size());

// This is checked by "stack underflow".
assert(frame.stack_height >= frame.parent_stack_height);
assert(frame_stack_height >= frame.parent_stack_height);

const auto arity = frame.type.has_value() ? 1 : 0;

if (frame.stack_height > frame.parent_stack_height + arity)
if (frame_stack_height > frame.parent_stack_height + arity)
throw validation_error{"too many results"};

if (frame.unreachable)
return;

if (frame.stack_height < frame.parent_stack_height + arity)
if (frame_stack_height < frame.parent_stack_height + arity)
throw validation_error{"missing result"};
}

@@ -155,14 +140,16 @@ inline uint8_t get_branch_arity(const ControlFrame& frame) noexcept
return frame.instruction == Instr::loop ? 0 : arity;
}

inline void validate_branch_stack_height(
const ControlFrame& current_frame, const ControlFrame& branch_frame)
inline void validate_branch_stack_height(const ControlFrame& current_frame,
const ControlFrame& branch_frame, const Stack<ValType>& operand_stack)
{
assert(current_frame.stack_height >= current_frame.parent_stack_height);
const auto current_frame_stack_height = static_cast<int>(operand_stack.size());

assert(current_frame_stack_height >= current_frame.parent_stack_height);

const auto arity = get_branch_arity(branch_frame);
if (!current_frame.unreachable &&
(current_frame.stack_height < current_frame.parent_stack_height + arity))
(current_frame_stack_height < current_frame.parent_stack_height + arity))
throw validation_error{"branch stack underflow"};
}

@@ -183,22 +170,18 @@ void push_branch_immediates(const ControlFrame& branch_frame, int stack_height,
push(immediates, arity);
}

inline void mark_frame_unreachable(ControlFrame& frame) noexcept
inline void mark_frame_unreachable(ControlFrame& frame, Stack<ValType>& operand_stack) noexcept
{
frame.unreachable = true;
frame.stack_height = frame.parent_stack_height;
operand_stack.shrink(static_cast<size_t>(frame.parent_stack_height));
}

inline void drop_operand(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the "Validation Algorithm" this is a pop_opd(). Is the returned type ever needed?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can rename it to pop_operand.

Though it's not exactly the same thing as pop_opd() in Validation Algorithm, where it's a low-level primitive used to build all other checks. There return value is needed to check against the expected value and also return value can be Unknown.
We never push unknown values into stack, so here return value would always be equal to expected_type input value.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't have to rename it. I was just curious.

ControlFrame& frame, Stack<ValType>& operand_stack, std::optional<ValType> expected_type)
const ControlFrame& frame, Stack<ValType>& operand_stack, std::optional<ValType> expected_type)
{
if (frame.stack_height < frame.parent_stack_height + 1)
{
if (!frame.unreachable)
throw validation_error{"stack underflow"};
}
else
--frame.stack_height;
if (!frame.unreachable &&
static_cast<int>(operand_stack.size()) < frame.parent_stack_height + 1)
throw validation_error{"stack underflow"};

if (frame.unreachable && static_cast<int>(operand_stack.size()) == frame.parent_stack_height)
return;
@@ -208,9 +191,8 @@ inline void drop_operand(
throw validation_error{"type mismatch"};
}

inline void push_operand(ControlFrame& frame, Stack<ValType>& operand_stack, ValType type)
inline void push_operand(Stack<ValType>& operand_stack, ValType type)
{
++frame.stack_height;
operand_stack.push(type);
}

@@ -276,7 +258,8 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f
// already popped/reset), but it does not matter, as these instructions do not modify
// stack height anyway.
if (!frame.unreachable)
code.max_stack_height = std::max(code.max_stack_height, frame.stack_height);
code.max_stack_height =
std::max(code.max_stack_height, static_cast<int>(operand_stack.size()));

update_operand_stack(frame, operand_stack, type.inputs, type.outputs);

@@ -369,7 +352,7 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f
break;

case Instr::unreachable:
mark_frame_unreachable(frame);
mark_frame_unreachable(frame, operand_stack);
break;

case Instr::drop:
@@ -448,7 +431,7 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f
std::tie(block_type, pos) = parse_blocktype(pos, end);

// Push label with immediates offset after arity.
control_stack.emplace(Instr::block, block_type, frame.stack_height,
control_stack.emplace(Instr::block, block_type, static_cast<int>(operand_stack.size()),
code.instructions.size(), code.immediates.size());
break;
}
@@ -458,7 +441,7 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f
std::optional<ValType> loop_type;
std::tie(loop_type, pos) = parse_blocktype(pos, end);

control_stack.emplace(Instr::loop, loop_type, frame.stack_height,
control_stack.emplace(Instr::loop, loop_type, static_cast<int>(operand_stack.size()),
code.instructions.size(), code.immediates.size());
break;
}
@@ -468,8 +451,8 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f
std::optional<ValType> if_type;
std::tie(if_type, pos) = parse_blocktype(pos, end);

control_stack.emplace(Instr::if_, if_type, frame.stack_height, code.instructions.size(),
code.immediates.size());
control_stack.emplace(Instr::if_, if_type, static_cast<int>(operand_stack.size()),
code.instructions.size(), code.immediates.size());

// Placeholders for immediate values, filled at the matching end or else instructions.
push(code.immediates, uint32_t{0}); // Diff to the else instruction
@@ -483,10 +466,9 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f
if (frame.instruction != Instr::if_)
throw parser_error{"unexpected else instruction (if instruction missing)"};

validate_result_count(frame); // else is the end of if.
validate_result_count(frame, operand_stack); // else is the end of if.

// Reset frame after if. The if result type validation not implemented yet.
frame.stack_height = frame.parent_stack_height;
frame.unreachable = false;
const auto if_imm_offset = frame.immediates_offset;
frame.immediates_offset = code.immediates.size();
@@ -511,7 +493,7 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f

case Instr::end:
{
validate_result_count(frame);
validate_result_count(frame, operand_stack);

if (frame.instruction != Instr::loop) // If end of block/if/else instruction.
{
@@ -545,12 +527,13 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f
}
}
const auto frame_type = frame.type;
operand_stack.shrink(static_cast<size_t>(frame.parent_stack_height));
control_stack.pop(); // Pop the current frame.

if (control_stack.empty())
continue_parsing = false;
else if (frame_type.has_value())
control_stack.top().stack_height += 1; // The results of the popped frame.
operand_stack.push(*frame_type);
break;
}

@@ -565,15 +548,16 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f

auto& branch_frame = control_stack[label_idx];

validate_branch_stack_height(frame, branch_frame);
validate_branch_stack_height(frame, branch_frame, operand_stack);

// Remember this br immediates offset to fill it at end instruction.
branch_frame.br_immediate_offsets.push_back(code.immediates.size());

push_branch_immediates(branch_frame, frame.stack_height, code.immediates);
push_branch_immediates(
branch_frame, static_cast<int>(operand_stack.size()), code.immediates);

if (instr == Instr::br)
mark_frame_unreachable(frame);
mark_frame_unreachable(frame, operand_stack);

break;
}
@@ -597,7 +581,7 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f
auto& default_branch_frame = control_stack[default_label_idx];
const auto default_branch_arity = get_branch_arity(default_branch_frame);

validate_branch_stack_height(frame, default_branch_frame);
validate_branch_stack_height(frame, default_branch_frame, operand_stack);

// Remember immediates offset for all br items to fill them at end instruction.
push(code.immediates, static_cast<uint32_t>(label_indices.size()));
@@ -609,13 +593,15 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f
throw validation_error{"br_table labels have inconsistent types"};

branch_frame.br_immediate_offsets.push_back(code.immediates.size());
push_branch_immediates(branch_frame, frame.stack_height, code.immediates);
push_branch_immediates(
branch_frame, static_cast<int>(operand_stack.size()), code.immediates);
}

default_branch_frame.br_immediate_offsets.push_back(code.immediates.size());
push_branch_immediates(default_branch_frame, frame.stack_height, code.immediates);
push_branch_immediates(
default_branch_frame, static_cast<int>(operand_stack.size()), code.immediates);

mark_frame_unreachable(frame);
mark_frame_unreachable(frame, operand_stack);

break;
}
@@ -628,13 +614,14 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f

auto& branch_frame = control_stack[label_idx];

validate_branch_stack_height(frame, branch_frame);
validate_branch_stack_height(frame, branch_frame, operand_stack);

branch_frame.br_immediate_offsets.push_back(code.immediates.size());

push_branch_immediates(control_stack[label_idx], frame.stack_height, code.immediates);
push_branch_immediates(
control_stack[label_idx], static_cast<int>(operand_stack.size()), code.immediates);

mark_frame_unreachable(frame);
mark_frame_unreachable(frame, operand_stack);
break;
}

@@ -683,7 +670,7 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f
LocalIdx local_idx;
std::tie(local_idx, pos) = leb128u_decode<uint32_t>(pos, end);

push_operand(frame, operand_stack, find_local_type(func_inputs, locals, local_idx));
push_operand(operand_stack, find_local_type(func_inputs, locals, local_idx));

push(code.immediates, local_idx);
break;
@@ -705,7 +692,7 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f

const auto local_type = find_local_type(func_inputs, locals, local_idx);
drop_operand(frame, operand_stack, local_type);
push_operand(frame, operand_stack, local_type);
push_operand(operand_stack, local_type);

push(code.immediates, local_idx);
break;
@@ -719,7 +706,7 @@ parser_result<Code> parse_expr(const uint8_t* pos, const uint8_t* end, FuncIdx f
if (global_idx >= module.get_global_count())
throw validation_error{"accessing global with invalid index"};

push_operand(frame, operand_stack, module.get_global_type(global_idx).value_type);
push_operand(operand_stack, module.get_global_type(global_idx).value_type);

push(code.immediates, global_idx);
break;