Skip to content

Commit 17b0704

Browse files
committed
Further optimize Dataset#map by populating array in C instead of yielding each entry to Ruby
This avoids C -> Ruby calls, which are more expensive when a JIT is used. It's also faster by allocating an array with the necessary capacity up front, instead of having Ruby resize it. A similar approach is already used for as_hash and to_hash_groups.
1 parent ae408cf commit 17b0704

File tree

3 files changed

+42
-3
lines changed

3 files changed

+42
-3
lines changed

CHANGELOG

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
=== master
22

3+
* Further optimize Dataset#map by populating array in C instead of yielding to Ruby (jeremyevans)
4+
35
* Optimize Dataset#as_set and #select_set in Sequel 5.99+ (jeremyevans)
46

57
=== 1.17.2 (2025-03-14)

ext/sequel_pg/sequel_pg.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@
6565
#define SPG_YIELD_MKV_HASH_GROUPS 11
6666
#define SPG_YIELD_KMV_HASH_GROUPS 12
6767
#define SPG_YIELD_MKMV_HASH_GROUPS 13
68+
#define SPG_YIELD_COLUMN_ARRAY 14
69+
#define SPG_YIELD_COLUMNS_ARRAY 15
6870

6971
/* External functions defined by ruby-pg */
7072
PGconn* pg_get_pgconn(VALUE);
@@ -91,6 +93,7 @@ static VALUE spg_vmasks6;
9193
static VALUE spg_sym_utc;
9294
static VALUE spg_sym_local;
9395
static VALUE spg_sym_map;
96+
static VALUE spg_sym_map_array;
9497
static VALUE spg_sym_first;
9598
static VALUE spg_sym_array;
9699
static VALUE spg_sym_hash;
@@ -1405,6 +1408,12 @@ static VALUE spg_yield_hash_rows_internal(VALUE self, PGresult *res, int enc_ind
14051408
} else if (rb_type(pg_value) == T_ARRAY) {
14061409
type = SPG_YIELD_COLUMNS;
14071410
}
1411+
} else if (pg_type == spg_sym_map_array) {
1412+
if (SYMBOL_P(pg_value)) {
1413+
type = SPG_YIELD_COLUMN_ARRAY;
1414+
} else if (rb_type(pg_value) == T_ARRAY) {
1415+
type = SPG_YIELD_COLUMNS_ARRAY;
1416+
}
14081417
} else if (pg_type == spg_sym_first) {
14091418
type = SPG_YIELD_FIRST;
14101419
} else if (pg_type == spg_sym_array) {
@@ -1463,6 +1472,35 @@ static VALUE spg_yield_hash_rows_internal(VALUE self, PGresult *res, int enc_ind
14631472
rb_yield(spg__col_values(self, h, colsyms, nfields, res, i, colconvert, enc_index));
14641473
}
14651474
break;
1475+
case SPG_YIELD_COLUMN_ARRAY:
1476+
/* Array containing single column */
1477+
{
1478+
VALUE ary = rb_ary_new2(ntuples);
1479+
j = spg__field_id(pg_value, colsyms, nfields);
1480+
if (j == -1) {
1481+
for(i=0; i<ntuples; i++) {
1482+
rb_ary_store(ary, i, Qnil);
1483+
}
1484+
}
1485+
else {
1486+
for(i=0; i<ntuples; i++) {
1487+
rb_ary_store(ary, i, spg__col_value(self, res, i, j, colconvert, enc_index));
1488+
}
1489+
}
1490+
rb_yield(ary);
1491+
}
1492+
break;
1493+
case SPG_YIELD_COLUMNS_ARRAY:
1494+
/* Array containing arrays of columns */
1495+
{
1496+
VALUE ary = rb_ary_new2(ntuples);
1497+
h = spg__field_ids(pg_value, colsyms, nfields);
1498+
for(i=0; i<ntuples; i++) {
1499+
rb_ary_store(ary, i, spg__col_values(self, h, colsyms, nfields, res, i, colconvert, enc_index));
1500+
}
1501+
rb_yield(ary);
1502+
}
1503+
break;
14661504
case SPG_YIELD_FIRST:
14671505
/* First column */
14681506
for(i=0; i<ntuples; i++) {
@@ -1918,6 +1956,7 @@ void Init_sequel_pg(void) {
19181956
spg_sym_utc = ID2SYM(rb_intern("utc"));
19191957
spg_sym_local = ID2SYM(rb_intern("local"));
19201958
spg_sym_map = ID2SYM(rb_intern("map"));
1959+
spg_sym_map_array = ID2SYM(rb_intern("map_array"));
19211960
spg_sym_first = ID2SYM(rb_intern("first"));
19221961
spg_sym_array = ID2SYM(rb_intern("array"));
19231962
spg_sym_hash = ID2SYM(rb_intern("hash"));

lib/sequel_pg/sequel_pg.rb

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,7 @@ def map(sym=nil)
3838
if block_given?
3939
super
4040
else
41-
rows = []
42-
clone(:_sequel_pg_type=>:map, :_sequel_pg_value=>sym).fetch_rows(sql){|s| rows << s}
43-
rows
41+
clone(:_sequel_pg_type=>:map_array, :_sequel_pg_value=>sym).fetch_rows(sql){|a| return a}
4442
end
4543
else
4644
super

0 commit comments

Comments
 (0)