Skip to content

Commit

Permalink
Merge pull request #74794 from lawnjelly/gdscript_parser_hashtable
Browse files Browse the repository at this point in the history
[3.x] Use hash table for GDScript parsing
  • Loading branch information
akien-mga authored Mar 12, 2023
2 parents 16df341 + 19f2006 commit 26a5841
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 60 deletions.
4 changes: 4 additions & 0 deletions modules/gdscript/gdscript.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2095,6 +2095,8 @@ GDScriptWarning::Code GDScriptWarning::get_code_from_name(const String &p_name)
#endif // DEBUG_ENABLED

GDScriptLanguage::GDScriptLanguage() {
GDScriptTokenizer::initialize();

calls = 0;
ERR_FAIL_COND(singleton);
singleton = this;
Expand Down Expand Up @@ -2139,6 +2141,8 @@ GDScriptLanguage::GDScriptLanguage() {
}

GDScriptLanguage::~GDScriptLanguage() {
GDScriptTokenizer::terminate();

if (_call_stack) {
memdelete_arr(_call_stack);
}
Expand Down
155 changes: 96 additions & 59 deletions modules/gdscript/gdscript_tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
#include "core/print_string.h"
#include "gdscript_functions.h"

OAHashMap<String, int> *GDScriptTokenizer::token_hashtable = nullptr;

const char *GDScriptTokenizer::token_names[TK_MAX] = {
"Empty",
"Identifier",
Expand Down Expand Up @@ -235,6 +237,96 @@ static const _kws _keyword_list[] = {
{ GDScriptTokenizer::TK_ERROR, nullptr }
};

// Prepare the hash table for parsing as a one off at startup.
void GDScriptTokenizer::initialize() {
token_hashtable = memnew((OAHashMap<String, int>));

token_hashtable->insert("null", 0);
token_hashtable->insert("true", 1);
token_hashtable->insert("false", 2);

// _type_list
int id = TOKEN_HASH_TABLE_TYPE_START;
int idx = 0;
while (_type_list[idx].text) {
token_hashtable->insert(_type_list[idx].text, id++);
idx++;
}

// built in funcs
id = TOKEN_HASH_TABLE_BUILTIN_START;
for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) {
token_hashtable->insert(GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j)), id++);
}

// keywords
id = TOKEN_HASH_TABLE_KEYWORD_START;
idx = 0;
while (_keyword_list[idx].text) {
token_hashtable->insert(_keyword_list[idx].text, id++);
idx++;
}
}

void GDScriptTokenizer::terminate() {
if (token_hashtable) {
memdelete(token_hashtable);
token_hashtable = nullptr;
}
}

// return whether found
bool GDScriptTokenizerText::_parse_identifier(const String &p_str) {
// N.B. GDScriptTokenizer::initialize() must have been called before using this function,
// else token_hashtable will be NULL.
const int *found = token_hashtable->lookup_ptr(p_str);

if (found) {
int id = *found;
if (id < TOKEN_HASH_TABLE_TYPE_START) {
switch (id) {
case 0: {
_make_constant(Variant());
} break;
case 1: {
_make_constant(true);
} break;
case 2: {
_make_constant(false);
} break;
default: {
DEV_ASSERT(0);
} break;
}
return true;
} else {
// type list
if (id < TOKEN_HASH_TABLE_BUILTIN_START) {
int idx = id - TOKEN_HASH_TABLE_TYPE_START;
_make_type(_type_list[idx].type);
return true;
}

// built in func
if (id < TOKEN_HASH_TABLE_KEYWORD_START) {
int idx = id - TOKEN_HASH_TABLE_BUILTIN_START;
_make_built_in_func(GDScriptFunctions::Function(idx));
return true;
}

// keyword
int idx = id - TOKEN_HASH_TABLE_KEYWORD_START;
_make_token(_keyword_list[idx].token);
return true;
}

return true;
}

// not found
return false;
}

const char *GDScriptTokenizer::get_token_name(Token p_token) {
ERR_FAIL_INDEX_V(p_token, TK_MAX, "<error>");
return token_names[p_token];
Expand Down Expand Up @@ -977,68 +1069,13 @@ void GDScriptTokenizerText::_advance() {
i++;
}

bool identifier = false;

if (str == "null") {
_make_constant(Variant());

} else if (str == "true") {
_make_constant(true);

} else if (str == "false") {
_make_constant(false);
} else {
bool found = false;

{
int idx = 0;

while (_type_list[idx].text) {
if (str == _type_list[idx].text) {
_make_type(_type_list[idx].type);
found = true;
break;
}
idx++;
}
}

if (!found) {
//built in func?

for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) {
if (str == GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j))) {
_make_built_in_func(GDScriptFunctions::Function(j));
found = true;
break;
}
}
}

if (!found) {
//keyword
// Detect preset keywords / functions using hashtable.
bool found = _parse_identifier(str);

int idx = 0;
found = false;

while (_keyword_list[idx].text) {
if (str == _keyword_list[idx].text) {
_make_token(_keyword_list[idx].token);
found = true;
break;
}
idx++;
}
}

if (!found) {
identifier = true;
}
}

if (identifier) {
if (!found) {
_make_identifier(str);
}

INCPOS(str.length());
return;
}
Expand Down
15 changes: 14 additions & 1 deletion modules/gdscript/gdscript_tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#ifndef GDSCRIPT_TOKENIZER_H
#define GDSCRIPT_TOKENIZER_H

#include "core/oa_hash_map.h"
#include "core/pair.h"
#include "core/string_name.h"
#include "core/ustring.h"
Expand Down Expand Up @@ -154,9 +155,20 @@ class GDScriptTokenizer {

static const char *token_names[TK_MAX];

enum {
TOKEN_HASH_TABLE_TYPE_START = 3,
TOKEN_HASH_TABLE_BUILTIN_START = TOKEN_HASH_TABLE_TYPE_START + Variant::VARIANT_MAX,
TOKEN_HASH_TABLE_KEYWORD_START = TOKEN_HASH_TABLE_BUILTIN_START + GDScriptFunctions::FUNC_MAX,
};

static OAHashMap<String, int> *token_hashtable;

public:
static const char *get_token_name(Token p_token);

static void initialize();
static void terminate();

bool is_token_literal(int p_offset = 0, bool variable_safe = false) const;
StringName get_token_literal(int p_offset = 0) const;

Expand All @@ -177,7 +189,7 @@ class GDScriptTokenizer {
virtual bool is_ignoring_warnings() const = 0;
#endif // DEBUG_ENABLED

virtual ~GDScriptTokenizer(){};
virtual ~GDScriptTokenizer() {}
};

class GDScriptTokenizerText : public GDScriptTokenizer {
Expand Down Expand Up @@ -230,6 +242,7 @@ class GDScriptTokenizerText : public GDScriptTokenizer {
#endif // DEBUG_ENABLED

void _advance();
bool _parse_identifier(const String &p_str);

public:
void set_code(const String &p_code);
Expand Down

0 comments on commit 26a5841

Please sign in to comment.