Ordered/unordered st_table

e$B$J$+$@$G$9!#e(B

e$B$D$$$G$K!"e(Bst_tablee$B$N=g=x$NM-L5$r@ZBX$($k%Q%C%A$G$9!#e(B

Index: hash.c

— hash.c (revision 13339)
+++ hash.c (working copy)
@@ -233,4 +233,5 @@ rb_hash_tbl(VALUE hash)
if (!RHASH(hash)->ntbl) {
RHASH(hash)->ntbl = st_init_table(&objhash);

  • st_orderize(RHASH(hash)->ntbl);
    }
    return RHASH(hash)->ntbl;
    @@ -402,4 +403,5 @@ rb_hash_rehash(VALUE hash)
    return hash;
    tbl = st_init_table_with_size(RHASH(hash)->ntbl->type,
    RHASH(hash)->ntbl->num_entries);
  • st_orderize(tbl);
    rb_hash_foreach(hash, rb_hash_rehash_i, (st_data_t)tbl);
    st_free_table(RHASH(hash)->ntbl);
    Index: st.c
    ===================================================================
    — st.c (revision 13339)
    +++ st.c (working copy)
    @@ -7,4 +7,5 @@
    #include <stdlib.h>
    #endif
    +#include <stddef.h>
    #include <string.h>

@@ -28,4 +29,10 @@ struct st_table_entry {
};

+#define ST_UNORDER_MARK (st_table_entry *)-1
+#define ST_ORDERED_P(table) ((table)->head != ST_UNORDER_MARK)
+#define SIZEOF_ORDERED_ENTRY sizeof(st_table_entry)
+#define SIZEOF_UNORDERED_ENTRY offsetof(st_table_entry, fore)
+#define SIZEOF_ENTRY(table) (ST_ORDERED_P(table) ? SIZEOF_ORDERED_ENTRY
: SIZEOF_UNORDERED_ENTRY)
+
#define ST_DEFAULT_MAX_DENSITY 5
#define ST_DEFAULT_INIT_TABLE_SIZE 11
@@ -168,5 +175,5 @@ st_init_table_with_size(const struct st_
tbl->num_bins = size;
tbl->bins = (st_table_entry *)Calloc(size,
sizeof(st_table_entry
));

  • tbl->head = 0;
  • tbl->head = ST_UNORDER_MARK;

    return tbl;
    @@ -203,4 +210,13 @@ st_init_strtable_with_size(int size)
    }

+int
+st_orderize(st_table *table)
+{

  • if (ST_ORDERED_P(table)) return 0;
  • if (table->num_entries) return -1;
  • table->head = 0;
  • return 0;
    +}

void
st_clear(st_table *table)
@@ -224,5 +240,5 @@ st_clear(st_table *table)
}
table->num_entries = 0;

  • table->head = 0;
  • if (ST_ORDERED_P(table)) table->head = 0;
    }

@@ -293,5 +309,5 @@ do {
}
\

  • entry = alloc(st_table_entry);\
  • entry = malloc(SIZEOF_ENTRY(table));

    entry->hash = hash_val;
    @@ -299,5 +315,7 @@ do {
    entry->record = value;
    entry->next = table->bins[bin_pos];\
  • if ((head = table->head) != 0) {\
  • if (!ST_ORDERED_P(table)) {\
  • }\
  • else if ((head = table->head) != 0) {
    entry->fore = head;
    (entry->back = head->back)->fore = entry;
    @@ -392,21 +410,39 @@ rehash(register st_table *table)
    {
    register st_table_entry *ptr, **new_bins;
  • int i, new_num_bins;
  • unsigned int i, old_num_bins = table->num_bins, new_num_bins;
    unsigned int hash_val;
  • int size = new_size(table->num_bins+1);
  • new_num_bins = new_size(table->num_bins+1);
  • new_bins = (st_table_entry**)
  • xrealloc(table->bins, new_num_bins * sizeof(st_table_entry*));
  • for (i = 0; i < new_num_bins; ++i) new_bins[i] = 0;
  • table->num_bins = new_num_bins;
  • table->bins = new_bins;
  • if (size < 0) return;
  • new_num_bins = (unsigned int)size;
  • if (!ST_ORDERED_P(table)) {
  • new_bins = calloc(new_num_bins, sizeof(st_table_entry*));
  • for (i = 0; i < old_num_bins; i++) {
  •  ptr = table->bins[i];
    
  •  while (ptr != 0) {
    
  • st_table_entry *next = ptr->next;
  • hash_val = ptr->hash % new_num_bins;
  • ptr->next = new_bins[hash_val];
  • new_bins[hash_val] = ptr;
  • ptr = next;
  •  }
    
  • }
  • free(table->bins);
  • }
  • else {
  • new_bins = xrealloc(table->bins, new_num_bins *
    sizeof(st_table_entry*));
  • for (i = 0; i < new_num_bins; ++i) new_bins[i] = 0;
  • if ((ptr = table->head) != 0) {
  • do {
  •  hash_val = ptr->hash % new_num_bins;
    
  •  ptr->next = new_bins[hash_val];
    
  •  new_bins[hash_val] = ptr;
    
  • } while ((ptr = ptr->fore) != table->head);
  • if ((ptr = table->head) != 0) {
  •  do {
    
  • hash_val = ptr->hash % new_num_bins;
  • ptr->next = new_bins[hash_val];
  • new_bins[hash_val] = ptr;
  •  } while ((ptr = ptr->fore) != table->head);
    
  • }
    }
  • table->num_bins = new_num_bins;
  • table->bins = new_bins;
    }

@@ -438,9 +474,28 @@ st_copy(st_table *old_table)
}

  • if ((ptr = old_table->head) != 0) {
  • if (!ST_ORDERED_P(old_table)) {
  • unsigned int i;
  • for (i = 0; i < num_bins; i++) {
  •  ptr = old_table->bins[i];
    
  •  tail = &new_table->bins[i];
    
  •  while (ptr != 0) {
    
  • entry = malloc(SIZEOF_UNORDERED_ENTRY);
  • if (entry == 0) {
  •    st_free_table(new_table);
    
  •    return 0;
    
  • }
  • *entry = *ptr;
  • entry->next = 0;
  • *tail = entry;
  • tail = &entry->next;
  • ptr = ptr->next;
  •  }
    
  • }
  • }
  • else if ((ptr = old_table->head) != 0) {
    prev = 0;
    tail = &new_table->head;
    do {
  •  entry = alloc(st_table_entry);
    
  •  entry = malloc(SIZEOF_ORDERED_ENTRY);
     if (entry == 0) {
    
    st_free_table(new_table);
    @@ -465,5 +520,8 @@ st_copy(st_table *old_table)
    #define REMOVE_ENTRY(table, ptr) do
    { \
  • if (ptr == ptr->fore) { \
  • if (!ST_ORDERED_P(table)) { \
  •  /* do nothing  */            \
    
  • } \
  • else if (ptr == ptr->fore) {
    table->head = 0;
    }
    @@ -601,5 +659,37 @@ st_foreach(st_table *table, int (*func)(
    }
  • if ((ptr = table->head) != 0) {
  • if (!ST_ORDERED_P(table)) {

  • for (i = 0; i < table->num_bins; i++) {

  •  for (ptr = *(last = &table->bins[i]); ptr != 0;) {
    
  • retval = (*func)(ptr->key, ptr->record, arg);

  • switch (retval) {

  •  case ST_CHECK:  /* check if hash is modified during iteration */
    
  •    tmp = 0;
    
  •    if (i < table->num_bins) {
    
  •  for (tmp = table->bins[i]; tmp; tmp = tmp->next) {
    
  •      if (tmp == ptr) break;
    
  •  }
    
  •    }
    
  •    if (!tmp) {
    
  •  /* call func with error notice */
    
  •  retval = (*func)(0, 0, arg, 1);
    
  •  return 1;
    
  •    }
    
  •    /* fall through */
    
  •  case ST_CONTINUE:
    
  •    ptr = *(last = &ptr->next);
    
  •    break;
    
  •  case ST_STOP:
    
  •    return 0;
    
  •  case ST_DELETE:
    
  •    tmp = ptr;
    
  •    ptr = *last = ptr->next;
    
  •    free(tmp);
    
  •    table->num_entries--;
    
  • }

  •  }
    
  • }

  • }

  • else if ((ptr = table->head) != 0) {
    do {
    end = ptr->fore == table->head;
    @@ -647,4 +737,8 @@ st_reverse_foreach(st_table *table, int
    int i, end;

  • if (!ST_ORDERED_P(table)) {

  • return st_foreach(table, func, arg);

  • }

  • if (table->entries_packed) {
    for (i = table->num_entries-1; 0 <= i; i–) {
    Index: include/ruby/st.h
    ===================================================================
    — include/ruby/st.h (revision 13339)
    +++ include/ruby/st.h (working copy)
    @@ -72,4 +72,5 @@ st_table *st_init_numtable_with_size(int
    st_table *st_init_strtable(void);
    st_table *st_init_strtable_with_size(int);
    +int st_orderize(st_table *);
    int st_delete(st_table *, st_data_t *, st_data_t *);
    int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t);

In article [email protected],
Nobuyoshi N. [email protected] writes:

e$B$D$$$G$K!"e(Bst_tablee$B$N=g=x$NM-L5$r@ZBX$($k%Q%C%A$G$9!#e(B

r13412 e$B$K%Q%C%A$rEv$F$FB,$C$F$_$^$7$?!#e(B

e$BMWAG$NB?$$e(B st_table (e$B$?$@$7e(B Hash e$B0J30e(B)
e$B$,8z$/$O$:$J$N$G!"$^e(B
e$B$:$O%7%s%%k$rBgNL$K@8@.$7$F$_$^$9!#e(B

e$B%Q%C%AA0e(B:
% time ./ruby -e ‘s = “a”; 1000000.times { s.intern; s.succ! }
print File.read(“/proc/#{$$}/status”)[/^VmSize.*\n/]’
VmSize: 95048 kB
./ruby -e 6.52s user 0.12s system 99% cpu 6.667 total

e$B%Q%C%A8ee(B:
% time ./ruby -e ‘s = “a”; 1000000.times { s.intern; s.succ! }
print File.read(“/proc/#{$$}/status”)[/^VmSize.*\n/]’
VmSize: 79336 kB
./ruby -e 6.92s user 0.12s system 98% cpu 7.169 total

95Mbytes e$B$+$ie(B 79Mbytes e$B$H!"$=$l$J$j$K8:$C$F$$$^$9!#e(B16%
e$B$/$ie(B
e$B$$$G$9$+!#e(B

e$B$^$?!"e(B(rdoc e$B$GLdBj$K$J$C$?e(B) iv_tbl e$B$N1F6A$r$$k$?$a$Ke(B
26e$B8D$Ne(B
e$B%$%s%9%?%s%9JQ?t$,$"$k%*%V%8%'%/%H$r$?$/$5$s@8@.$7$F$
$^$9!#e(B

e$B%Q%C%AA0e(B:
% time ./ruby -e ’
class C
def initialize
@a = @b = @c = @d = @e = @f = @g = @h = @i = @j = @k = @l = @m =
@n = @o = @p = @q = @r = @s = @t = @u = @v = @w = @x = @y = @z = 0
end
end
a = []; 100000.times { a << C.new }
print File.read(“/proc/#{$$}/status”)[/^VmSize.*\n/]’
VmSize: 96896 kB
./ruby -e 2.68s user 0.12s system 98% cpu 2.833 total

e$B%Q%C%A8ee(B:
% time ./ruby -e ’
class C
def initialize
@a = @b = @c = @d = @e = @f = @g = @h = @i = @j = @k = @l = @m =
@n = @o = @p = @q = @r = @s = @t = @u = @v = @w = @x = @y = @z = 0
end
end
a = []; 100000.times { a << C.new }
print File.read(“/proc/#{$$}/status”)[/^VmSize.*\n/]’
VmSize: 74420 kB
./ruby -e 2.60s user 0.11s system 97% cpu 2.774 total

e$B$3$l$be(B 97Mbytes e$B$+$ie(B 74Mbytes e$B$H!"e(B23%
e$B$/$i$$8:$C$F$$$^$9!#e(B

numtable e$B$Ge(B linked list e$B$r;H$$$O$8$a$ke(B
(e$B=g=x$r$D$1$J$$$3$H$Ke(B
e$B$h$k8z2L$,=P;O$a$ke(B) e$B$N$O%$%s%9%?%s%9JQ?te(B
6e$B$D$J$N$G!"$=$3$rB,$Ce(B
e$B$F$_$k$H0J2<$N$h$&$K$J$j$^$9!#e(B

e$B%Q%C%AA0e(B:
% time ./ruby -e ’
class C
def initialize
@a = @b = @c = @d = @e = @f = 0
end
end
a = []; 100000.times { a << C.new }
print File.read(“/proc/#{$$}/status”)[/^VmSize.*\n/]’
VmSize: 34348 kB
./ruby -e 0.86s user 0.05s system 98% cpu 0.920 total

e$B%Q%C%A8ee(B:
% time ./ruby -e ’
class C
def initialize
@a = @b = @c = @d = @e = @f = 0
end
end
a = []; 100000.times { a << C.new }
print File.read(“/proc/#{$$}/status”)[/^VmSize.*\n/]’
VmSize: 27576 kB
./ruby -e 0.80s user 0.06s system 99% cpu 0.873 total

e$B$3$N>l9g$G$be(B 34Mbytes e$B$+$ie(B 28Mbytes e$B$He(B 20%
e$B$/$i$$8:$C$F$$$^e(B
e$B$9!#e(B

e$B$R$H$D$N%%V%8%'%/%H$KBP$7$F%$%s%9%?%s%9JQ?t$re(B 1000e$B8D$/$i$$e(B
e$B:n$l$P7`E
$J%a%b%j:o8:$r<B8=$9$kNc$r:n$l$k$+$b$7$l$^$;$s!#e(B

e$B$J$*!"e(Brdoc
e$B<+?H$bB,$j$^$7$?$,!“e(B(e$BEvA3$N$3$H$G$O$”$j$^$9$,e(B) e$B$b$O$de(B
e$B$<$s$<$s8z$-$^$;$s!#e(B