Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: avoid crash #29

Merged
merged 1 commit into from
Apr 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions binding.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
],
"cflags_c": [
"-std=c99",
"-fexceptions"
],
"defines": [
"TREE_SITTER_MARKDOWN_AVOID_CRASH"
],
"conditions": [
["OS=='mac'", { "xcode_settings": { "GCC_ENABLE_CPP_EXCEPTIONS": "YES" } }]
]
}
]
Expand Down
1 change: 1 addition & 0 deletions bindings/rust/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ fn main() {
cpp_config.cpp(true);
cpp_config.include(&src_dir);
cpp_config
.define("TREE_SITTER_MARKDOWN_AVOID_CRASH", None)
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable");
let scanner_path = src_dir.join("scanner.cc");
Expand Down
1 change: 1 addition & 0 deletions bindings/rust/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,6 @@ mod tests {
parser
.set_language(super::language())
.expect("Error loading markdown language");
assert_eq!(format!("{:?}", parser.parse("abc", None).unwrap()), "{Tree {Node document (0, 0) - (0, 3)}}")
}
}
48 changes: 31 additions & 17 deletions src/scanner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@

#include "./tree_sitter_markdown/token_type.h"

#ifdef TREE_SITTER_MARKDOWN_AVOID_CRASH
#define TREE_SITTER_MARKDOWN_ASSERT(condition) if (!(condition)) throw 1;
#else
#define TREE_SITTER_MARKDOWN_ASSERT(condition) assert(condition)
#endif

// tree-sitter does not support multiple files for external scanner
#include "./tree_sitter_markdown/block_context.cc"
#include "./tree_sitter_markdown/block_delimiter.cc"
Expand Down Expand Up @@ -47,7 +53,7 @@ struct Scanner {
i += blk_ctx_stk_.serialize(&buffer[i]);
buffer[i++] = has_opt_wsp_ind_;

assert(i <= TREE_SITTER_SERIALIZATION_BUFFER_SIZE);
TREE_SITTER_MARKDOWN_ASSERT(i <= TREE_SITTER_SERIALIZATION_BUFFER_SIZE);

return i;
}
Expand All @@ -70,7 +76,7 @@ struct Scanner {
i += blk_ctx_stk_.deserialize(&buffer[i]);
has_opt_wsp_ind_ = buffer[i++];

assert(i == length);
TREE_SITTER_MARKDOWN_ASSERT(i == length);
}
}

Expand All @@ -79,7 +85,7 @@ struct Scanner {
lxr_.mrk_end();

if (!min_inl_dlms_.empty() && is_inl_cls_mrk_sym(min_inl_dlms_.front().sym())) {
assert(min_inl_dlms_.front().len() == 0);
TREE_SITTER_MARKDOWN_ASSERT(min_inl_dlms_.front().len() == 0);
TokenType rlt_sym = min_inl_dlms_.front().tkn_typ(lxr_.cur_chr(), lxr_.lka_chr());
if (rlt_sym != TKN_NOT_FOUND) {
min_inl_dlms_.pop_front();
Expand Down Expand Up @@ -147,7 +153,7 @@ struct Scanner {
has_opt_wsp_ind_ = false;
}
} else if (is_blk_cls_sym(dlm.sym())) {
assert(is_paired_blk_syms(blk_ctx_stk_.back().sym(), dlm.sym()));
TREE_SITTER_MARKDOWN_ASSERT(is_paired_blk_syms(blk_ctx_stk_.back().sym(), dlm.sym()));
blk_ctx_stk_.pop();
has_opt_wsp_ind_ = false;
} else {
Expand Down Expand Up @@ -178,7 +184,7 @@ struct Scanner {
bool has_txt = false;
while (!min_inl_dlms_.empty() && !is_eol_chr(lxr_.lka_chr())) {
if (is_wht_chr(lxr_.lka_chr()) && valid_symbols[TKN_WRD]) {
assert(has_txt);
TREE_SITTER_MARKDOWN_ASSERT(has_txt);
break;
}

Expand All @@ -187,7 +193,7 @@ struct Scanner {

if (rlt_sym == TKN_NOT_FOUND) {
if (is_wsp_chr(lxr_.lka_chr())) {
assert(has_txt);
TREE_SITTER_MARKDOWN_ASSERT(has_txt);
lxr_.mrk_end();
lxr_.adv_rpt(is_wsp_chr);
} else {
Expand All @@ -210,7 +216,7 @@ struct Scanner {
min_inl_dlms_.pop_front();

if (rlt_sym == TKN_HRD_LBK) {
assert(blk_dlms_.front().sym() == SYM_LIT_LBK);
TREE_SITTER_MARKDOWN_ASSERT(blk_dlms_.front().sym() == SYM_LIT_LBK);
lxr_.adv_len(blk_dlms_.front().len());
blk_dlms_.pop_front();
}
Expand All @@ -227,32 +233,32 @@ struct Scanner {
return lxr_.ret_sym(TKN_WSP);
}

assert(has_txt);
TREE_SITTER_MARKDOWN_ASSERT(has_txt);
if (!is_wsp_chr(lxr_.cur_chr())) lxr_.mrk_end();
return lxr_.ret_sym(valid_symbols[TKN_WRD] ? TKN_WRD : TKN_TXT);
}

assert(min_inl_dlms_.empty());
assert(inl_dlms_.empty());
assert(inl_ctx_stk_.empty());
TREE_SITTER_MARKDOWN_ASSERT(min_inl_dlms_.empty());
TREE_SITTER_MARKDOWN_ASSERT(inl_dlms_.empty());
TREE_SITTER_MARKDOWN_ASSERT(inl_ctx_stk_.empty());

if (blk_ctx_stk_.empty() && is_eof_chr(lxr_.lka_chr())) {
assert(blk_dlms_.empty());
TREE_SITTER_MARKDOWN_ASSERT(blk_dlms_.empty());
if (valid_symbols[TKN_EOF]) return lxr_.ret_sym(TKN_EOF);
return false;
}

if (is_eol_chr(lxr_.lka_chr())) {
assert(blk_dlms_.empty());
TREE_SITTER_MARKDOWN_ASSERT(blk_dlms_.empty());
scn_eol(lxr_, blk_dlms_, blk_ctx_stk_);
assert(!blk_dlms_.empty());
TREE_SITTER_MARKDOWN_ASSERT(!blk_dlms_.empty());
return lxr_.ret_sym(TKN_LKA);
}

if (valid_symbols[TKN_IND_COD_BGN_PFX] || valid_symbols[TKN_LST_ITM_CNT_BGN_MKR]) {
assert(blk_dlms_.empty());
TREE_SITTER_MARKDOWN_ASSERT(blk_dlms_.empty());
scn_blk(lxr_, blk_dlms_, blk_ctx_stk_, lxr_.cur_ind() - has_opt_wsp_ind_);
assert(!blk_dlms_.empty());
TREE_SITTER_MARKDOWN_ASSERT(!blk_dlms_.empty());
return lxr_.ret_sym(TKN_LKA);
}

Expand All @@ -263,7 +269,7 @@ struct Scanner {
do lxr_.mrk_end();
while (!is_wht_chr(lxr_.lka_chr()) && scn_inl(lxr_, inl_dlms_, inl_ctx_stk_, blk_dlms_, blk_ctx_stk_) == SYM_TXT);
} else if (sym != SYM_BLK_TXT) {
assert(!inl_dlms_.empty());
TREE_SITTER_MARKDOWN_ASSERT(!inl_dlms_.empty());
}

inl_dlms_.transfer_to(min_inl_dlms_);
Expand Down Expand Up @@ -302,7 +308,15 @@ void tree_sitter_markdown_external_scanner_deserialize(void *payload, const char

bool tree_sitter_markdown_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = static_cast<Scanner *>(payload);
#ifdef TREE_SITTER_MARKDOWN_AVOID_CRASH
try {
return scanner->scan(lexer, valid_symbols);
} catch (...) {
return false;
}
#else
return scanner->scan(lexer, valid_symbols);
#endif
}

}
12 changes: 6 additions & 6 deletions src/tree_sitter_markdown/block_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@ bool BlockContext::has_fst_ctn() const { return has_fst_ctn_; }
Symbol BlockContext::sym() const { return sym_; }
LexedLength BlockContext::len() const { return len_; }
LexedColumn BlockContext::ind() const { return ind_; }
ParseState BlockContext::pst() const { assert(pst_ != PST_INVALID); return pst_; }
ParseState BlockContext::pst() const { TREE_SITTER_MARKDOWN_ASSERT(pst_ != PST_INVALID); return pst_; }

void BlockContext::mrk_has_fst_ctn() { has_fst_ctn_ = true; }

BlockContext::BlockContext(): has_fst_ctn_(false), sym_(SYM_TXT), len_(LEXED_LENGTH_MAX), ind_(LEXED_COLUMN_MAX), pst_(PST_INVALID) {}
BlockContext::BlockContext(const Symbol sym, const LexedLength len, const LexedColumn ind): has_fst_ctn_(false), sym_(sym), len_(len), ind_(ind), pst_(blk_sym_pst(sym)) {}

unsigned BlockContext::serialize(unsigned char *buffer) const {
assert(is_blk_sym(sym_));
assert(sym_ <= 0b1111111);
assert(len_ <= 0b11111111);
assert(ind_ <= 0b11111111);
TREE_SITTER_MARKDOWN_ASSERT(is_blk_sym(sym_));
TREE_SITTER_MARKDOWN_ASSERT(sym_ <= 0b1111111);
TREE_SITTER_MARKDOWN_ASSERT(len_ <= 0b11111111);
TREE_SITTER_MARKDOWN_ASSERT(ind_ <= 0b11111111);
buffer[0] = (sym_ << 1) | has_fst_ctn_;
buffer[1] = len_;
buffer[2] = ind_;
Expand Down Expand Up @@ -75,7 +75,7 @@ void BlockContextStack::push(const BlockContext &ctx) {
stk_.push_back(ctx);
}
void BlockContextStack::pop() {
assert(!empty());
TREE_SITTER_MARKDOWN_ASSERT(!empty());
stk_.pop_back();
}

Expand Down
14 changes: 7 additions & 7 deletions src/tree_sitter_markdown/block_delimiter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ Symbol BlockDelimiter::sym() const { return sym_; }
LexedLength BlockDelimiter::len() const { return len_; }
LexedColumn BlockDelimiter::ind() const { return ind_; }
bool BlockDelimiter::has_pos() const { return has_pos_; }
const LexedPosition &BlockDelimiter::pos() const { assert(has_pos_); return pos_; }
const LexedPosition &BlockDelimiter::end_pos() const { assert(has_pos_); return end_pos_; }
const LexedPosition &BlockDelimiter::pos() const { TREE_SITTER_MARKDOWN_ASSERT(has_pos_); return pos_; }
const LexedPosition &BlockDelimiter::end_pos() const { TREE_SITTER_MARKDOWN_ASSERT(has_pos_); return end_pos_; }

void BlockDelimiter::set_len(const LexedLength len) {
len_ = len;
Expand All @@ -21,7 +21,7 @@ void BlockDelimiter::set_pos(const LexedPosition &pos, const LexedPosition &end_
pos_.set(pos);
end_pos_.set(end_pos);
has_pos_ = true;
assert(pos_.dist(end_pos_) == len_);
TREE_SITTER_MARKDOWN_ASSERT(pos_.dist(end_pos_) == len_);
}
void BlockDelimiter::drop_pos() {
has_pos_ = false;
Expand All @@ -34,10 +34,10 @@ BlockDelimiter::BlockDelimiter(const Symbol sym, const LexedPosition &pos, const
sym_(sym), len_(pos.dist(end_pos)), ind_(ind), has_pos_(true), pos_(pos), end_pos_(end_pos) {}

unsigned BlockDelimiter::serialize(unsigned char *buffer) const {
assert(is_blk_sym(sym_));
assert(sym_ < 0b11111111);
assert(len_ < 0b11111111);
assert(ind_ < 0b11111111);
TREE_SITTER_MARKDOWN_ASSERT(is_blk_sym(sym_));
TREE_SITTER_MARKDOWN_ASSERT(sym_ < 0b11111111);
TREE_SITTER_MARKDOWN_ASSERT(len_ < 0b11111111);
TREE_SITTER_MARKDOWN_ASSERT(ind_ < 0b11111111);
buffer[0] = sym_;
buffer[1] = len_;
buffer[2] = ind_;
Expand Down
30 changes: 15 additions & 15 deletions src/tree_sitter_markdown/block_scan.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,13 @@ void scn_blk(Lexer &lxr, BlockDelimiterList &blk_dlms, const BlockContextStack &

if (!is_tbl) tmp_blk_dlms.push_back(BlockDelimiter(SYM_PGH_BGN_MKR, 0));
}
assert(!tmp_blk_dlms.empty());
TREE_SITTER_MARKDOWN_ASSERT(!tmp_blk_dlms.empty());
push_lst_nod_mkr_if_necessary(blk_dlms, tmp_blk_dlms.front(), ind, blk_ctx_stk.empty() ? SYM_NOT_FOUND : blk_ctx_stk.back().sym());
tmp_blk_dlms.transfer_to(blk_dlms);
}

bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockContextStack &blk_ctx_stk) {
assert(is_eol_chr(lxr.lka_chr()));
TREE_SITTER_MARKDOWN_ASSERT(is_eol_chr(lxr.lka_chr()));

LexedPosition bgn_pos = lxr.cur_pos();

Expand Down Expand Up @@ -153,7 +153,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
&& !blk_ctx_stk.empty()
&& (blk_ctx_stk.back().sym() == SYM_BTK_FEN_COD_BGN || blk_ctx_stk.back().sym() == SYM_TLD_FEN_COD_BGN)
) {
assert(!has_blk_lbk);
TREE_SITTER_MARKDOWN_ASSERT(!has_blk_lbk);
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), 0));
break;
Expand All @@ -168,7 +168,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
const bool is_eol = is_eol_chr(lxr.lka_chr());
if (is_pas_all_blk_ctx && is_eol) {
if (blk_ctx_stk.empty() || blk_ctx_stk.back().sym() == SYM_BQT_BGN) {
assert(!has_blk_lbk);
TREE_SITTER_MARKDOWN_ASSERT(!has_blk_lbk);
has_blk_lbk = true;
tmp_blk_dlms.push_back(BlockDelimiter(SYM_BNK_LBK, bgn_pos, lst_non_wsp_end_pos));
break;
Expand All @@ -180,7 +180,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
|| blk_ctx_stk.back().sym() == SYM_TBL_DLM_ROW_BGN_MKR
|| blk_ctx_stk.back().sym() == SYM_TBL_DAT_ROW_BGN_MKR
) {
assert(!has_blk_lbk);
TREE_SITTER_MARKDOWN_ASSERT(!has_blk_lbk);
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), 0));
break;
Expand All @@ -203,7 +203,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
}
} else if (is_pas_all_blk_ctx) {
if (blk_ctx_stk.empty() || blk_ctx_stk.back().sym() == SYM_BQT_BGN) {
assert(!has_blk_lbk);
TREE_SITTER_MARKDOWN_ASSERT(!has_blk_lbk);
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LIT_LBK, bgn_pos, lst_non_wsp_end_pos));
has_opn_mkr = true;
scn_blk(lxr, tmp_blk_dlms, blk_ctx_stk, cur_ind);
Expand All @@ -216,7 +216,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
break;
}
if (blk_ctx_stk.back().sym() == SYM_HTM_BLK_DIV_BGN_MKR || blk_ctx_stk.back().sym() == SYM_HTM_BLK_CMP_BGN_MKR) {
assert(!has_blk_lbk);
TREE_SITTER_MARKDOWN_ASSERT(!has_blk_lbk);
LexedLength ind_chr_cnt;
LexedLength vrt_spc_cnt = lxr.clc_vtr_spc_cnt(cur_ind, 0, ind_chr_cnt);
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LIT_LBK, bgn_pos.dist(lst_non_wsp_end_pos) + ind_chr_cnt));
Expand Down Expand Up @@ -311,7 +311,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
}
break;
}
assert(
TREE_SITTER_MARKDOWN_ASSERT(
!has_blk_lbk
&& (blk_ctx_stk.back().sym() == SYM_PGH_BGN_MKR
|| blk_ctx_stk.back().sym() == SYM_TBL_DLM_ROW_BGN_MKR
Expand All @@ -335,7 +335,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
}
break;
} else if (is_eol) {
assert(!blk_ctx_stk.empty());
TREE_SITTER_MARKDOWN_ASSERT(!blk_ctx_stk.empty());
const BlockContext *fst_bqt_ctx = NULL_PTR;
for (
BlockContextStack::ConstIterator cur_ctx_itr = fst_failed_ctx_itr;
Expand Down Expand Up @@ -367,7 +367,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
|| blk_ctx_stk.back().sym() == SYM_TBL_DLM_ROW_BGN_MKR
|| blk_ctx_stk.back().sym() == SYM_TBL_DAT_ROW_BGN_MKR
) {
assert(!has_blk_lbk);
TREE_SITTER_MARKDOWN_ASSERT(!has_blk_lbk);
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), 0));
break;
Expand All @@ -378,9 +378,9 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
break;
}
} else {
assert(!blk_ctx_stk.empty());
TREE_SITTER_MARKDOWN_ASSERT(!blk_ctx_stk.empty());
if (blk_ctx_stk.back().sym() == SYM_BTK_FEN_COD_BGN || blk_ctx_stk.back().sym() == SYM_TLD_FEN_COD_BGN) {
assert(!has_blk_lbk);
TREE_SITTER_MARKDOWN_ASSERT(!has_blk_lbk);
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), 0));
break;
Expand Down Expand Up @@ -413,7 +413,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
}
break;
}
assert(blk_ctx_stk.back().sym() == SYM_PGH_BGN_MKR && !has_blk_lbk);
TREE_SITTER_MARKDOWN_ASSERT(blk_ctx_stk.back().sym() == SYM_PGH_BGN_MKR && !has_blk_lbk);
if (BSR_ACCEPT == scn_blk_nod(lxr, tmp_blk_dlms, cur_ind, is_pas_all_blk_ctx, /*is_pgh_cont_ln*/ true)) {
has_opn_mkr = true;
BlockContextStack::ConstReverseIterator cur_ctx_itr = blk_ctx_stk.rbegin();
Expand Down Expand Up @@ -457,7 +457,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
continue;
}

assert(is_eof_chr(lxr.lka_chr()) && !has_opn_mkr && !has_end_mkr);
TREE_SITTER_MARKDOWN_ASSERT(is_eof_chr(lxr.lka_chr()) && !has_opn_mkr && !has_end_mkr);
has_blk_lbk = false;
tmp_blk_dlms.clear();
for (
Expand All @@ -471,7 +471,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
if (!has_opn_mkr && !has_end_mkr && !blk_ctx_stk.empty() && !is_eof_chr(lbk_nxt_chr)) {
BlockContext &ctx = blk_ctx_stk.back();
if (!ctx.has_fst_ctn() && (ctx.sym() == SYM_BTK_FEN_COD_BGN || ctx.sym() == SYM_TLD_FEN_COD_BGN)) {
assert(tmp_blk_dlms.front().sym() == SYM_LIT_LBK || tmp_blk_dlms.front().sym() == SYM_BNK_LBK);
TREE_SITTER_MARKDOWN_ASSERT(tmp_blk_dlms.front().sym() == SYM_LIT_LBK || tmp_blk_dlms.front().sym() == SYM_BNK_LBK);
tmp_blk_dlms.transfer_to(blk_dlms, 1);
blk_dlms.push_back(BlockDelimiter(SYM_FEN_COD_CTN_BGN_MKR, 0));
}
Expand Down
Loading