Skip to content

Commit

Permalink
Merge pull request #1 from jeremyevans/master
Browse files Browse the repository at this point in the history
Fix handling of escaped backslashes, Refactor the parser
  • Loading branch information
Dan McClain committed Jul 12, 2012
2 parents 94681cc + e887f5e commit ad4987d
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 65 deletions.
124 changes: 70 additions & 54 deletions ext/pg_array_parser/pg_array_parser.c
Original file line number Diff line number Diff line change
@@ -1,90 +1,109 @@
#include <ruby.h>

VALUE PgArrayParser = Qnil;

//Prototypes
VALUE read_array(int *index, char *string, int *length, char *word);
VALUE parse_pg_array(VALUE self, VALUE pg_array_string);
/* Prototype */
VALUE read_array(int *index, char *string, int length, char *word);

VALUE parse_pg_array(VALUE self, VALUE pg_array_string) {

//convert to c-string, create a buffer of the same length, as that will be the worst case
/* convert to c-string, create a buffer of the same length, as that will be the worst case */
char *c_pg_array_string = StringValueCStr(pg_array_string);
int array_string_length = RSTRING_LEN(pg_array_string);
char *word = malloc(sizeof(char) * (array_string_length + 1));
char *word = malloc(array_string_length + 1);

int index = 1;

VALUE return_value = read_array(&index, c_pg_array_string, &array_string_length, word);
VALUE return_value = read_array(&index, c_pg_array_string, array_string_length, word);
free(word);
return return_value;
}

VALUE read_array(int *index, char *c_pg_array_string, int *array_string_length, char *word)
VALUE read_array(int *index, char *c_pg_array_string, int array_string_length, char *word)
{
// Return value: array
/* Return value: array */
VALUE array;
array = rb_ary_new();
int word_index = 0;

/* The current character in the input string. */
char c;

/* 0: Currently outside a quoted string, current word never quoted
* 1: Currently inside a quoted string
* -1: Currently outside a quoted string, current word previously quoted */
int openQuote = 0;
for(;(*index) < (*array_string_length); ++(*index))

/* Inside quoted input means the next character should be treated literally,
* instead of being treated as a metacharacter.
* Outside of quoted input, means that the word shouldn't be pushed to the array,
* used when the last entry was a subarray (which adds to the array itself). */
int escapeNext = 0;

array = rb_ary_new();

/* Special case the empty array, so it doesn't need to be handled manually inside
* the loop. */
if(((*index) < array_string_length) && c_pg_array_string[(*index)] == '}')
{
return array;
}

for(;(*index) < array_string_length; ++(*index))
{
if(!openQuote && (c_pg_array_string[*index] == ','))
c = c_pg_array_string[*index];
if(openQuote < 1)
{
if(c_pg_array_string[(*index) - 1] != '"' && c_pg_array_string[(*index) - 1] != '}')
if(c == ',' || c == '}')
{
word[word_index] = '\0';
if (word_index == 4 && !strcmp(word,"NULL"))
if(!escapeNext)
{
rb_ary_push(array, Qnil);
if(openQuote == 0 && word_index == 4 && !strncmp(word, "NULL", word_index))
{
rb_ary_push(array, Qnil);
}
else
{
rb_ary_push(array, rb_str_new(word, word_index));
}
}
else
if(c == '}')
{
rb_ary_push(array, rb_str_new2(word));
return array;
}
escapeNext = 0;
openQuote = 0;
word_index = 0;
}
}
else if(!openQuote && c_pg_array_string[*index] == '}')
{
if(word_index > 0 && c_pg_array_string[(*index) - 1] != '"')
else if(c == '"')
{
word[word_index] = '\0';
if (word_index == 4 && !strcmp(word,"NULL"))
{
rb_ary_push(array, Qnil);
}
else
{
rb_ary_push(array, rb_str_new2(word));
}
word_index = 0;
openQuote = 1;
}
else if(c == '{')
{
(*index)++;
rb_ary_push(array, read_array(index, c_pg_array_string, array_string_length, word));
escapeNext = 1;
}
else
{
word[word_index] = c;
word_index++;
}
return array;
}
else if (openQuote && c_pg_array_string[*index] == '"' && c_pg_array_string[(*index) - 1] == '\\')
{
word[word_index - 1] = '"';
}
else if (openQuote && c_pg_array_string[*index] == '"' && c_pg_array_string[(*index) - 1] != '\\')
{
word[word_index] = '\0';
word_index = 0;
openQuote = 0;
rb_ary_push(array, rb_str_new2(word));
else if (escapeNext) {
word[word_index] = c;
word_index++;
escapeNext = 0;
}
else if(c_pg_array_string[*index] == '"')
else if (c == '\\')
{
openQuote = 1;
escapeNext = 1;
}
else if(!openQuote && c_pg_array_string[*index] == '{')
else if (c == '"')
{
(*index)++;
rb_ary_push(array, read_array(index, c_pg_array_string, array_string_length, word));
openQuote = -1;
}
else
{
word[word_index] = c_pg_array_string[*index];
word[word_index] = c;
word_index++;
}
}
Expand All @@ -93,9 +112,6 @@ VALUE read_array(int *index, char *c_pg_array_string, int *array_string_length,
}

void Init_pg_array_parser(void) {
PgArrayParser = rb_define_module("PgArrayParser");
rb_define_method(PgArrayParser, "parse_pg_array", parse_pg_array, 1);

rb_define_method(rb_define_module("PgArrayParser"), "parse_pg_array", parse_pg_array, 1);
}


45 changes: 34 additions & 11 deletions spec/parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,59 +9,82 @@ class Parser

describe '#parse_pg_array' do
context 'one dimensional arrays' do
context 'empty' do
it 'returns an empty array' do
parser.parse_pg_array(%[{}]).should == []
end
end

context 'no strings' do
it 'returns an array of strings' do
parser.parse_pg_array(%[{1,2,3}]).should eq ['1','2','3']
parser.parse_pg_array(%[{1,2,3}]).should == ['1','2','3']
end
end

context 'NULL values' do
it 'returns an array of strings, with nils replacing NULL characters' do
parser.parse_pg_array(%[{1,NULL,NULL}]).should eq ['1',nil,nil]
parser.parse_pg_array(%[{1,NULL,NULL}]).should == ['1',nil,nil]
end
end

context 'quoted NULL' do
it 'returns an array with the word NULL' do
parser.parse_pg_array(%[{1,"NULL",3}]).should eq ['1','NULL','3']
parser.parse_pg_array(%[{1,"NULL",3}]).should == ['1','NULL','3']
end
end

context 'strings' do
it 'returns an array of strings when containing commas in a quoted string' do
parser.parse_pg_array(%[{1,"2,3",4}]).should eq ['1','2,3','4']
parser.parse_pg_array(%[{1,"2,3",4}]).should == ['1','2,3','4']
end

it 'returns an array of strings when containing an escaped quote' do
parser.parse_pg_array(%[{1,"2\\",3",4}]).should eq ['1','2",3','4']
parser.parse_pg_array(%[{1,"2\\",3",4}]).should == ['1','2",3','4']
end

it 'returns an array of strings when containing an escaped backslash' do
parser.parse_pg_array(%[{1,"2\\\\",3,4}]).should == ['1','2\\','3','4']
parser.parse_pg_array(%[{1,"2\\\\\\",3",4}]).should == ['1','2\\",3','4']
end
end
end

context 'two dimensional arrays' do
context 'empty' do
it 'returns an empty array' do
parser.parse_pg_array(%[{{}}]).should == [[]]
parser.parse_pg_array(%[{{},{}}]).should == [[],[]]
end
end
context 'no strings' do
it 'returns an array of strings with a sub array' do
parser.parse_pg_array(%[{1,{2,3},4}]).should eq ['1',['2','3'],'4']
parser.parse_pg_array(%[{1,{2,3},4}]).should == ['1',['2','3'],'4']
end
end
context 'strings' do
it 'returns an array of strings with a sub array' do
parser.parse_pg_array(%[{1,{"2,3"},4}]).should eq ['1',['2,3'],'4']
parser.parse_pg_array(%[{1,{"2,3"},4}]).should == ['1',['2,3'],'4']
end
it 'returns an array of strings with a sub array and a quoted }' do
parser.parse_pg_array(%[{1,{"2,}3",NULL},4}]).should eq ['1',['2,}3',nil],'4']
parser.parse_pg_array(%[{1,{"2,}3",NULL},4}]).should == ['1',['2,}3',nil],'4']
end
it 'returns an array of strings with a sub array and a quoted {' do
parser.parse_pg_array(%[{1,{"2,{3"},4}]).should eq ['1',['2,{3'],'4']
parser.parse_pg_array(%[{1,{"2,{3"},4}]).should == ['1',['2,{3'],'4']
end
it 'returns an array of strings with a sub array and a quoted { and escaped quote' do
parser.parse_pg_array(%[{1,{"2\\",{3"},4}]).should eq ['1',['2",{3'],'4']
parser.parse_pg_array(%[{1,{"2\\",{3"},4}]).should == ['1',['2",{3'],'4']
end
end
end
context 'three dimensional arrays' do
context 'empty' do
it 'returns an empty array' do
parser.parse_pg_array(%[{{{}}}]).should == [[[]]]
parser.parse_pg_array(%[{{{},{}},{{},{}}}]).should == [[[],[]],[[],[]]]
end
end
it 'returns an array of strings with sub arrays' do
parser.parse_pg_array(%[{1,{2,{3,4}},{NULL,6},7}]).should eq ['1',['2',['3','4']],[nil,'6'],'7']
parser.parse_pg_array(%[{1,{2,{3,4}},{NULL,6},7}]).should == ['1',['2',['3','4']],[nil,'6'],'7']
end
end
end
Expand Down

0 comments on commit ad4987d

Please sign in to comment.