Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
#include "internal/object.h"
#include "internal/proc.h"
#include "internal/rational.h"
#include "internal/re.h"
#include "internal/sanitizers.h"
#include "internal/struct.h"
#include "internal/symbol.h"
Expand Down Expand Up @@ -2710,11 +2711,8 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
}
break;
case T_REGEXP:
if (RANY(obj)->as.regexp.ptr) {
onig_free(RANY(obj)->as.regexp.ptr);
RB_DEBUG_COUNTER_INC(obj_regexp_ptr);
}
break;
rb_reg_free(obj);
break;
case T_DATA:
if (DATA_PTR(obj)) {
int free_immediately = FALSE;
Expand Down
1 change: 1 addition & 0 deletions internal/re.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

/* re.c */
VALUE rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline);
void rb_reg_free(VALUE re);
VALUE rb_reg_check_preprocess(VALUE);
long rb_reg_search0(VALUE, VALUE, long, int, int);
VALUE rb_reg_match_p(VALUE re, VALUE str, long pos);
Expand Down
1 change: 1 addition & 0 deletions internal/vm.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ int rb_vm_check_optimizable_mid(VALUE mid);
VALUE rb_yield_refine_block(VALUE refinement, VALUE refinements);
MJIT_STATIC VALUE ruby_vm_special_exception_copy(VALUE);
PUREFUNC(st_table *rb_vm_fstring_table(void));
PUREFUNC(st_table *rb_vm_regexp_literals_table(void));

MJIT_SYMBOL_EXPORT_BEGIN
VALUE vm_exec(struct rb_execution_context_struct *, int); /* used in JIT-ed code */
Expand Down
129 changes: 93 additions & 36 deletions re.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@

#include <ctype.h>

#include "debug_counter.h"
#include "encindex.h"
#include "gc.h"
#include "internal.h"
#include "internal/error.h"
#include "internal/hash.h"
#include "internal/imemo.h"
#include "internal/re.h"
#include "internal/vm.h"
#include "regint.h"
#include "ruby/encoding.h"
#include "ruby/re.h"
Expand Down Expand Up @@ -2956,36 +2959,43 @@ rb_reg_new(const char *s, long len, int options)
return rb_enc_reg_new(s, len, rb_ascii8bit_encoding(), options);
}

VALUE
rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
static int
reg_lit_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
{
VALUE re = rb_reg_alloc();
onig_errmsg_buffer err = "";
VALUE *new_re = (VALUE *)arg;
VALUE re = (VALUE)*key;

if (!str) str = rb_str_new(0,0);
if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
rb_set_errinfo(rb_reg_error_desc(str, options, err));
return Qnil;
if (existing) {
/* because of lazy sweep, str may be unmarked already and swept
* at next time */

if (rb_objspace_garbage_object_p(re)) {
*new_re = Qundef;
return ST_DELETE;
}

*new_re = re;
return ST_STOP;
} else {
FL_SET(re, REG_LITERAL);
rb_obj_freeze(re);

*key = *new_re = re;
*value = RREGEXP_SRC(re);
return ST_CONTINUE;
}
FL_SET(re, REG_LITERAL);
rb_obj_freeze(re);
return re;
}

static VALUE reg_cache;

VALUE
rb_reg_regcomp(VALUE str)
static st_index_t
reg_hash(VALUE re)
{
if (reg_cache && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
&& ENCODING_GET(reg_cache) == ENCODING_GET(str)
&& memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
return reg_cache;

return reg_cache = rb_reg_new_str(str, 0);
st_index_t hashval;
hashval = RREGEXP_PTR(re)->options;
hashval = rb_hash_uint(hashval, rb_memhash(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re)));
return rb_hash_end(hashval);
}

static st_index_t reg_hash(VALUE re);
/*
* call-seq:
* rxp.hash -> integer
Expand All @@ -2998,21 +3008,75 @@ static st_index_t reg_hash(VALUE re);
static VALUE
rb_reg_hash(VALUE re)
{
rb_reg_check(re);
st_index_t hashval = reg_hash(re);
return ST2FIX(hashval);
}

static st_index_t
reg_hash(VALUE re)
VALUE
rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
{
st_index_t hashval;
VALUE re, ret;
onig_errmsg_buffer err = "";
st_table *regexp_literals = rb_vm_regexp_literals_table();

rb_reg_check(re);
hashval = RREGEXP_PTR(re)->options;
hashval = rb_hash_uint(hashval, rb_memhash(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re)));
return rb_hash_end(hashval);
if (!str) str = rb_str_new(0,0);
re = rb_reg_alloc();
if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
rb_set_errinfo(rb_reg_error_desc(str, options, err));
return Qnil;
}

do {
ret = re;
st_update(regexp_literals, (st_data_t)re,
reg_lit_update_callback, (st_data_t)&ret);
} while (ret == Qundef);
return ret;
}

void
rb_reg_free(VALUE re) {
if (FL_TEST(re, REG_LITERAL)) {
st_data_t regexp_literal = (st_data_t)re;
st_delete(rb_vm_regexp_literals_table(), &regexp_literal, NULL);
}

onig_free(RREGEXP_PTR(re));
RB_DEBUG_COUNTER_INC(obj_regexp_ptr);
}

static VALUE reg_cache;

VALUE
rb_reg_regcomp(VALUE str)
{
if (reg_cache && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
&& ENCODING_GET(reg_cache) == ENCODING_GET(str)
&& memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
return reg_cache;

return reg_cache = rb_reg_new_str(str, 0);
}

static int
reg_cmp(VALUE re1, VALUE re2)
{
if (re1 == re2) return 0;

if (!RB_TYPE_P(re2, T_REGEXP)) return 1;
if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return 1;
if (RREGEXP_PTR(re1)->options != RREGEXP_PTR(re2)->options) return 1;
if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return 1;
if (ENCODING_GET(re1) != ENCODING_GET(re2)) return 1;

return memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1));
}

const struct st_hash_type rb_regexp_literal_hash_type = {
reg_cmp,
reg_hash,
};

/*
* call-seq:
Expand All @@ -3035,14 +3099,7 @@ rb_reg_equal(VALUE re1, VALUE re2)
if (re1 == re2) return Qtrue;
if (!RB_TYPE_P(re2, T_REGEXP)) return Qfalse;
rb_reg_check(re1); rb_reg_check(re2);
if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
if (RREGEXP_PTR(re1)->options != RREGEXP_PTR(re2)->options) return Qfalse;
if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse;
if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) {
return Qtrue;
}
return Qfalse;
return reg_cmp(re1, re2) == 0 ? Qtrue : Qfalse;
}

/*
Expand Down
7 changes: 7 additions & 0 deletions test/ruby/test_regexp.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ def test_assert_normal_exit
Regexp.union("a", "a")
end

def test_literal_deduplication
assert_same(/a/, /a/)
refute_same(/a/, /a/m)
refute_same(/a/, Regexp.new('a'))
assert_equal(/a/, Regexp.new('a'))
end

def test_to_s
assert_equal '(?-mix:\x00)', Regexp.new("\0").to_s

Expand Down
15 changes: 14 additions & 1 deletion vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2241,6 +2241,7 @@ rb_vm_update_references(void *ptr)
if (ptr) {
rb_vm_t *vm = ptr;
rb_gc_update_tbl_refs(vm->frozen_strings);
rb_gc_update_tbl_refs(vm->regexp_literals);
}
}

Expand Down Expand Up @@ -2295,7 +2296,7 @@ rb_vm_mark(void *ptr)
rb_hook_list_mark(&vm->global_hooks);

rb_gc_mark_values(RUBY_NSIG, vm->trap_list.cmd);

rb_mark_tbl_no_pin(vm->regexp_literals);
mjit_mark();
}

Expand Down Expand Up @@ -2354,6 +2355,10 @@ ruby_vm_destruct(rb_vm_t *vm)
st_free_table(vm->frozen_strings);
vm->frozen_strings = 0;
}
if (vm->regexp_literals) {
st_free_table(vm->regexp_literals);
vm->regexp_literals = 0;
}
rb_vm_gvl_destroy(vm);
RB_ALTSTACK_FREE(vm->main_altstack);
if (objspace) {
Expand Down Expand Up @@ -3310,6 +3315,7 @@ rb_vm_set_progname(VALUE filename)
}

extern const struct st_hash_type rb_fstring_hash_type;
extern const struct st_hash_type rb_regexp_literal_hash_type;

void
Init_BareVM(void)
Expand Down Expand Up @@ -3345,6 +3351,7 @@ Init_vm_objects(void)
vm->mark_object_ary = rb_ary_tmp_new(128);
vm->loading_table = st_init_strtable();
vm->frozen_strings = st_init_table_with_size(&rb_fstring_hash_type, 1000);
vm->regexp_literals = st_init_table_with_size(&rb_regexp_literal_hash_type, 1000);
}

/* top self */
Expand Down Expand Up @@ -3406,6 +3413,12 @@ rb_vm_fstring_table(void)
return GET_VM()->frozen_strings;
}

st_table *
rb_vm_regexp_literals_table(void)
{
return GET_VM()->regexp_literals;
}

#if VM_COLLECT_USAGE_DETAILS

#define HASH_ASET(h, k, v) rb_hash_aset((h), (st_data_t)(k), (st_data_t)(v))
Expand Down
1 change: 1 addition & 0 deletions vm_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,7 @@ typedef struct rb_vm_struct {

VALUE *defined_strings;
st_table *frozen_strings;
st_table *regexp_literals;

const struct rb_builtin_function *builtin_function_table;
int builtin_inline_index;
Expand Down