Skip to content

Commit

Permalink
Prism::StringQuery
Browse files Browse the repository at this point in the history
Introduce StringQuery to provide methods to access some metadata
about the Ruby lexer.
  • Loading branch information
kddnewton committed Oct 11, 2024
1 parent b08e569 commit 2dd296f
Show file tree
Hide file tree
Showing 7 changed files with 400 additions and 4 deletions.
67 changes: 67 additions & 0 deletions ext/prism/extension.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ VALUE rb_cPrismResult;
VALUE rb_cPrismParseResult;
VALUE rb_cPrismLexResult;
VALUE rb_cPrismParseLexResult;
VALUE rb_cPrismStringQuery;

VALUE rb_cPrismDebugEncoding;

Expand Down Expand Up @@ -1133,6 +1134,67 @@ parse_file_failure_p(int argc, VALUE *argv, VALUE self) {
return RTEST(parse_file_success_p(argc, argv, self)) ? Qfalse : Qtrue;
}

/******************************************************************************/
/* String query methods */
/******************************************************************************/

/**
* Process the result of a call to a string query method and return an
* appropriate value.
*/
static VALUE
string_query(pm_string_query_t result) {
switch (result) {
case PM_STRING_QUERY_ERROR:
rb_raise(rb_eArgError, "Invalid or non ascii-compatible encoding");
return Qfalse;
case PM_STRING_QUERY_FALSE:
return Qfalse;
case PM_STRING_QUERY_TRUE:
return Qtrue;
}
}

/**
* call-seq:
* Prism::StringQuery::local?(string) -> bool
*
* Returns true if the string constitutes a valid local variable name. Note that
* this means the names that can be set through Binding#local_variable_set, not
* necessarily the ones that can be set through a local variable assignment.
*/
static VALUE
string_query_local_p(VALUE self, VALUE string) {
const uint8_t *source = (const uint8_t *) check_string(string);
return string_query(pm_string_query_local(source, RSTRING_LEN(string), rb_enc_get(string)->name));
}

/**
* call-seq:
* Prism::StringQuery::constant?(string) -> bool
*
* Returns true if the string constitutes a valid constant name. Note that this
* means the names that can be set through Module#const_set, not necessarily the
* ones that can be set through a constant assignment.
*/
static VALUE
string_query_constant_p(VALUE self, VALUE string) {
const uint8_t *source = (const uint8_t *) check_string(string);
return string_query(pm_string_query_constant(source, RSTRING_LEN(string), rb_enc_get(string)->name));
}

/**
* call-seq:
* Prism::StringQuery::method_name?(string) -> bool
*
* Returns true if the string constitutes a valid method name.
*/
static VALUE
string_query_method_name_p(VALUE self, VALUE string) {
const uint8_t *source = (const uint8_t *) check_string(string);
return string_query(pm_string_query_method_name(source, RSTRING_LEN(string), rb_enc_get(string)->name));
}

/******************************************************************************/
/* Initialization of the extension */
/******************************************************************************/
Expand Down Expand Up @@ -1170,6 +1232,7 @@ Init_prism(void) {
rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cPrismResult);
rb_cPrismLexResult = rb_define_class_under(rb_cPrism, "LexResult", rb_cPrismResult);
rb_cPrismParseLexResult = rb_define_class_under(rb_cPrism, "ParseLexResult", rb_cPrismResult);
rb_cPrismStringQuery = rb_define_class_under(rb_cPrism, "StringQuery", rb_cObject);

// Intern all of the IDs eagerly that we support so that we don't have to do
// it every time we parse.
Expand Down Expand Up @@ -1211,6 +1274,10 @@ Init_prism(void) {
rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
#endif

rb_define_singleton_method(rb_cPrismStringQuery, "local?", string_query_local_p, 1);
rb_define_singleton_method(rb_cPrismStringQuery, "constant?", string_query_constant_p, 1);
rb_define_singleton_method(rb_cPrismStringQuery, "method_name?", string_query_method_name_p, 1);

// Next, initialize the other APIs.
Init_prism_api_node();
Init_prism_pack();
Expand Down
47 changes: 47 additions & 0 deletions include/prism.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,53 @@ PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t

#endif

/**
* Represents the results of a slice query.
*/
typedef enum {
/** Returned if the encoding given to a slice query was invalid. */
PM_STRING_QUERY_ERROR = -1,

/** Returned if the result of the slice query is false. */
PM_STRING_QUERY_FALSE,

/** Returned if the result of the slice query is true. */
PM_STRING_QUERY_TRUE
} pm_string_query_t;

/**
* Check that the slice is a valid local variable name.
*
* @param source The source to check.
* @param length The length of the source.
* @param encoding_name The name of the encoding of the source.
* @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
* the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
*/
PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name);

/**
* Check that the slice is a valid constant name.
*
* @param source The source to check.
* @param length The length of the source.
* @param encoding_name The name of the encoding of the source.
* @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
* the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
*/
PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name);

/**
* Check that the slice is a valid method name.
*
* @param source The source to check.
* @param length The length of the source.
* @param encoding_name The name of the encoding of the source.
* @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
* the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
*/
PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name);

/**
* @mainpage
*
Expand Down
9 changes: 5 additions & 4 deletions lib/prism.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ module Prism
autoload :Pattern, "prism/pattern"
autoload :Reflection, "prism/reflection"
autoload :Serialize, "prism/serialize"
autoload :StringQuery, "prism/string_query"
autoload :Translation, "prism/translation"
autoload :Visitor, "prism/visitor"

Expand Down Expand Up @@ -75,13 +76,13 @@ def self.load(source, serialized)
# it's going to require the built library. Otherwise, it's going to require a
# module that uses FFI to call into the library.
if RUBY_ENGINE == "ruby" and !ENV["PRISM_FFI_BACKEND"]
require "prism/prism"

# The C extension is the default backend on CRuby.
Prism::BACKEND = :CEXT
else
require_relative "prism/ffi"

require "prism/prism"
else
# The FFI backend is used on other Ruby implementations.
Prism::BACKEND = :FFI

require_relative "prism/ffi"
end
4 changes: 4 additions & 0 deletions lib/prism/ffi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def self.load_exported_functions_from(header, *functions, callbacks)

callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer
enum :pm_string_init_result_t, %i[PM_STRING_INIT_SUCCESS PM_STRING_INIT_ERROR_GENERIC PM_STRING_INIT_ERROR_DIRECTORY]
enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE]

load_exported_functions_from(
"prism.h",
Expand All @@ -83,6 +84,9 @@ def self.load_exported_functions_from(header, *functions, callbacks)
"pm_serialize_lex",
"pm_serialize_parse_lex",
"pm_parse_success_p",
"pm_string_query_local",
"pm_string_query_constant",
"pm_string_query_method_name",
[:pm_parse_stream_fgets_t]
)

Expand Down
62 changes: 62 additions & 0 deletions lib/prism/string_query.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# frozen_string_literal: true

module Prism
# Query methods that allow categorizing strings based on their context for
# where they could be valid in a Ruby syntax tree.
class StringQuery
# The string that this query is wrapping.
attr_reader :string

# Initialize a new query with the given string.
def initialize(string)
@string = string
end

# Whether or not this string is a valid local variable name.
def local?
StringQuery.local?(string)
end

# Whether or not this string is a valid constant name.
def constant?
StringQuery.constant?(string)
end

# Whether or not this string is a valid method name.
def method_name?
StringQuery.method_name?(string)
end

# When using the FFI backend, we need to define the class-level methods
# using the FFI library.
if BACKEND == :FFI
class << self
# Mirrors the C extension's StringQuery::local? method.
def local?(string)
query(LibRubyParser.pm_string_query_local(string, string.bytesize, string.encoding.name))
end

# Mirrors the C extension's StringQuery::constant? method.
def constant?(string)
query(LibRubyParser.pm_string_query_constant(string, string.bytesize, string.encoding.name))
end

# Mirrors the C extension's StringQuery::method_name? method.
def method_name?(string)
query(LibRubyParser.pm_string_query_method_name(string, string.bytesize, string.encoding.name))
end

private

# Parse the enum result and return an appropriate boolean.
def query(result)
case result
when :PM_STRING_QUERY_ERROR then raise ArgumentError, "Invalid or non ascii-compatible encoding"
when :PM_STRING_QUERY_FALSE then false
when :PM_STRING_QUERY_TRUE then true
end
end
end
end
end
end
Loading

0 comments on commit 2dd296f

Please sign in to comment.