Ruby Forum Ruby-dev > faster Bignum#*

Posted by Yusuke ENDOH (Guest)
on 18.12.2007 16:19
(Received via mailing list)
$B1sF#$H?=$7$^$9!#(B

Bignum#* $B$K(B FFT $B$rMQ$$$?>h;;$r<BAu$7$F$_$^$7$?!#(B
10 $B?J$G(B 100000 
$B7e0J>e$"$k$h$&$J5pBg$J@0?t$N>h;;$,B.$/$J$j$^$9!#(B
$B$=$s$J<{MW$O$J$$$G$7$g$&$+!#(B

$ cat ../test_fft.rb
require "benchmark"

n = 10**10000
puts Benchmark.measure { 100.times { n * n } }

n = 10**100000
puts Benchmark.measure { 10.times { n * n } }

n = 10**1000000
puts Benchmark.measure { 1.times { n * n } }

# $B8=>u(B
$ ./ruby.org ../test_fft.rb
  2.590000   0.000000   2.590000 (  2.874859)
 26.040000   0.010000  26.050000 ( 28.927583)
266.540000   0.040000 266.580000 (276.491797)

# FFT $BHG(B
$ ./ruby.fft ../test_fft.rb
  2.630000   0.010000   2.640000 (  2.636339)
  2.790000   0.040000   2.830000 (  2.836020)
  6.270000   0.090000   6.360000 (  6.368478)

FFT $B$N<BAu$O652J=qDL$j$G$9!#4p?t$b(B 65536 (=2**(8*sizeof(short))) 
$B$+(B
256 (=2**(8*sizeof(char))) $B$N7h$aBG$A$G$9!#%U!<%j%(JQ49$H$+$h$/(B
$B$o$+$C$F$J$$$N$G!"%P%0$C$F$?$i$9$_$^$;$s!#>\$7$$J}$K8!>Z$d:F<BAu$r(B
$B$7$FD:$1$?$i:G9b$G$9!#(B

# $B$"$HC/$+!"%K%e!<%H%sK!$K$h$k=|;;$r<BAu$7$^$;$s!)(B


Index: bignum.c
===================================================================
--- bignum.c  (revision 14308)
+++ bignum.c  (working copy)
@@ -1442,10 +1442,145 @@
     }
 }

+static void
+big_fft(double *ar, double *ai, int al, double theta)
+{
+    int m, mh, i, j, k, irev;
+    double wr, wi, xr, xi;
+
+    i = 0;
+    for (j = 1; j < al - 1; j++) {
+  for (k = al >> 1; k > (i ^= k); k >>= 1);
+  if (j < i) {
+      xr = ar[j];
+      xi = ai[j];
+      ar[j] = ar[i];
+      ai[j] = ai[i];
+      ar[i] = xr;
+      ai[i] = xi;
+  }
+    }
+    for (mh = 1; (m = mh << 1) <= al; mh = m) {
+  irev = 0;
+  for (i = 0; i < al; i += m) {
+      wr = cos(theta * irev);
+      wi = sin(theta * irev);
+      for (k = al >> 2; k > (irev ^= k); k >>= 1);
+      for (j = i; j < mh + i; j++) {
+    k = j + mh;
+    xr = ar[j] - ar[k];
+    xi = ai[j] - ai[k];
+    ar[j] += ar[k];
+    ai[j] += ai[k];
+    ar[k] = wr * xr - wi * xi;
+    ai[k] = wr * xi + wi * xr;
+      }
+  }
+    }
+}
+
+static void
+big_digits_to_bases(int base, double *xr, long rlen, BDIGIT *xds, long 
dlen)
+{
+    long i, j, k, div = 1 << (base * CHAR_BIT);
+    BDIGIT v;
+
+    k = 0;
+    for (i = 0; i < dlen; i++) {
+  v = xds[i];
+  for (j = 0; j < sizeof(BDIGIT) / base; j++) {
+      xr[k++] = (double) (v % div);
+      v /= div;
+  }
+    }
+    for (; k < rlen; k++) xr[k] = 0;
+}
+
+static void
+big_bases_to_digits(int base, BDIGIT *zds, long dlen, double *xr)
+{
+    long i, j, k, div = 1 << (base * CHAR_BIT);
+    BDIGIT v, s;
+    double x, xm;
+
+    x = 0;
+    i = j = 0;
+    v = 0;
+    s = 1;
+    for (k = 0;; k++) {
+  x += floor(xr[k] + 0.5);
+  xm = fmod(x, div);
+  v |= ((BDIGIT) xm) * s;
+  x = (x - xm) / div;
+  s *= div;
+  if (++j >= sizeof(BDIGIT) / base) {
+      zds[i++] = v;
+      if (i == dlen) return;
+      j = 0;
+      v = 0;
+      s = 1;
+  }
+    }
+}
+
 static VALUE
+big_mul_fft(VALUE x, VALUE y)
+{
+    long i, len, base;
+    VALUE z;
+    double *xr, *xi, *yr, *yi, pi = atan(1.0) * 4.0;
+
+    /* calculate FFT length and decide the base of radix */
+    base = sizeof(short);
+    i = (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1) * sizeof(BDIGIT) / base;
+    for (len = 1; len < i; len *= 2);
+    if (len > 131072) {
+  len *= sizeof(short) / sizeof(char);
+  base = sizeof(char);
+    }
+
+    /* allocate two arraies of complexes */
+    xr = ALLOC_N(double, len);
+    xi = ALLOC_N(double, len);
+    yr = ALLOC_N(double, len);
+    yi = ALLOC_N(double, len);
+
+    /* initialize the arraies */
+    big_digits_to_bases(base, xr, len, BDIGITS(x), RBIGNUM_LEN(x));
+    big_digits_to_bases(base, yr, len, BDIGITS(y), RBIGNUM_LEN(y));
+    for (i = 0; i < len; i++) xi[i] = yi[i] = 0;
+
+    /* perform FFT */
+    big_fft(xr, xi, len, 2 * pi / len);
+    big_fft(yr, yi, len, 2 * pi / len);
+
+    /* multiply the arraies */
+    for (i = 0; i < len; i++) {
+  double t = xr[i] * yr[i] - xi[i] * yi[i];
+  xi[i]    = xr[i] * yi[i] + xi[i] * yr[i];
+  xr[i]    = t;
+    }
+    free(yr);
+    free(yi);
+
+    /* perform IFFT */
+    big_fft(xr, xi, len, -2 * pi / len);
+    free(xi);
+    for (i = 0; i < len; i++) xr[i] /= len;
+
+    /* make a result bignum */
+    len = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
+    z = bignew(len, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
+    big_bases_to_digits(base, BDIGITS(z), len, xr);
+    free(xr);
+
+    return z;
+}
+
+static VALUE
 rb_big_mul0(VALUE x, VALUE y)
 {
-    long i, j;
+    long i, j, nx, ny, len;
     BDIGIT_DBL n = 0;
     VALUE z;
     BDIGIT *zds;
@@ -1465,6 +1600,22 @@
   return rb_num_coerce_bin(x, y);
     }

+    /* estimate time */
+    for (i = nx = 0; i < RBIGNUM_LEN(x); i++) if (BDIGITS(x)[i]) nx++;
+    for (i = ny = 0; i < RBIGNUM_LEN(y); i++) if (BDIGITS(y)[i]) ny++;
+    for (len = 1; RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > len; len *= 2);
+
+    /* swap x and y (when y is more sparse than x) */
+    if (ny < nx) {
+  z = x; x = y; y = z; nx = ny;
+    }
+
+    /* multiply by FFT (if FFT seems to be faster) */
+    if ((double) nx * RBIGNUM_LEN(y) > (double) len * log(len) * 30) {
+  return big_mul_fft(x, y);
+    }
+
+    /* multiply in a normal way */
     j = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
     z = bignew(j, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
     zds = BDIGITS(z);
Posted by Yukihiro Matsumoto (Guest)
on 18.12.2007 18:04
(Received via mailing list)
$B$^$D$b$H(B $B$f$-$R$m$G$9(B

In message "Re: [ruby-dev:32629] faster Bignum#*"
    on Wed, 19 Dec 2007 00:18:17 +0900, "Yusuke ENDOH" <mame@tsg.ne.jp> 
writes:

|Bignum#* $B$K(B FFT $B$rMQ$$$?>h;;$r<BAu$7$F$_$^$7$?!#(B
|10 $B?J$G(B 100000 $B7e0J>e$"$k$h$&$J5pBg$J@0?t$N>h;;$,B.$/$J$j$^$9!#(B
|$B$=$s$J<{MW$O$J$$$G$7$g$&$+!#(B

$B$&!<$s!"$"$k$N$+$J!#%/%j%9%^%9$,=*$o$C$F$+$i<h$j9~$`J}8~$G9M(B
$B$($^$9!#(B
Posted by Yusuke ENDOH (Guest)
on 19.12.2007 04:50
(Received via mailing list)
$B1sF#$G$9!#(B

07/12/19 $B$K(B Yukihiro Matsumoto<matz@ruby-lang.org> 
$B$5$s$O=q$-$^$7$?(B:
> $B$($^$9!#(B
$BB?G\D9@0?t1i;;$O1_<~N($N7W;;$H$+%i%$%U%2!<%`$H$+$K;H$($k$N$G!"2x$7$$(B
$B<{MW$O$?$^$K$"$j$^$9(B ($B>P(B)

$B$H$j$"$($:!"5pBg$J@0?t$N>h;;$d(B to_s $B$r$7$?$H$-(B Ctrl-C 
$B$,$-$+$J$/$J$k(B
$B$N$O$J$s$H$+$7$H$$$?$i4r$7$$$H;W$$$^$9!#(B
$B$=$3$GBg$-$$?t$N(B rb_big_mul0 $B$H(B divrem $B$r(B 
rb_thread_blocking_region $B$G(B
$B<B9T$9$k%Q%C%A$r=q$$$F$_$^$7$?!#$I$&$G$7$g$&$+!#(B


Index: bignum.c
===================================================================
--- bignum.c  (revision 14313)
+++ bignum.c  (working copy)
@@ -1442,34 +1442,32 @@
     }
 }

+static void
+rb_big_stop(void *ptr)
+{
+    VALUE *stop = (VALUE*)ptr;
+    *stop = Qtrue;
+}
+
+struct big_mul_struct {
+    VALUE x, y, stop;
+};
+
 static VALUE
-rb_big_mul0(VALUE x, VALUE y)
+bigmul1(void *ptr)
 {
+    struct big_mul_struct *bms = (struct big_mul_struct*)ptr;
     long i, j;
     BDIGIT_DBL n = 0;
-    VALUE z;
+    VALUE x = bms->x, y = bms->y, z;
     BDIGIT *zds;

-    switch (TYPE(y)) {
-      case T_FIXNUM:
-  y = rb_int2big(FIX2LONG(y));
-  break;
-
-      case T_BIGNUM:
-  break;
-
-      case T_FLOAT:
-  return DOUBLE2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y));
-
-      default:
-  return rb_num_coerce_bin(x, y);
-    }
-
     j = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
     z = bignew(j, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
     zds = BDIGITS(z);
     while (j--) zds[j] = 0;
     for (i = 0; i < RBIGNUM_LEN(x); i++) {
+  if (bms->stop) return Qnil;
   BDIGIT_DBL dd = BDIGITS(x)[i];
   if (dd == 0) continue;
   n = 0;
@@ -1486,6 +1484,42 @@
     return z;
 }

+static VALUE
+rb_big_mul0(VALUE x, VALUE y)
+{
+    struct big_mul_struct bms;
+    VALUE z;
+
+    switch (TYPE(y)) {
+      case T_FIXNUM:
+  y = rb_int2big(FIX2LONG(y));
+  break;
+
+      case T_BIGNUM:
+  break;
+
+      case T_FLOAT:
+  return DOUBLE2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y));
+
+      default:
+  return rb_num_coerce_bin(x, y);
+    }
+
+    bms.x = x;
+    bms.y = y;
+    bms.stop = Qfalse;
+
+    if (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > 10000) {
+  VALUE stop = Qfalse;
+  z = rb_thread_blocking_region(bigmul1, &bms, rb_big_stop, &bms.stop);
+    }
+    else {
+  z = bigmul1(&bms);
+    }
+
+    return z;
+}
+
 /*
  *  call-seq:
  *     big * other  => Numeric
@@ -1499,9 +1533,15 @@
     return bignorm(rb_big_mul0(x, y));
 }

-static void
-bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
+struct big_div_struct {
+    VALUE x, y, *divp, *modp, stop;
+};
+
+static VALUE
+bigdivrem1(void *ptr)
 {
+    struct big_div_struct *bds = (struct big_div_struct*)ptr;
+    VALUE x = bds->x, y = bds->y, *divp = bds->divp, *modp = bds->modp;
     long nx = RBIGNUM_LEN(x), ny = RBIGNUM_LEN(y);
     long i, j;
     VALUE yy, z;
@@ -1575,6 +1615,7 @@

     j = nx==ny?nx+1:nx;
     do {
+  if (bds->stop) return Qnil;
   if (zds[j] ==  yds[ny-1]) q = BIGRAD-1;
   else q = (BDIGIT)((BIGUP(zds[j]) + zds[j-1])/yds[ny-1]);
   if (q) {
@@ -1627,6 +1668,27 @@
     }
 }

+static VALUE
+bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
+{
+    struct big_div_struct bds;
+    VALUE z;
+
+    bds.x = x;
+    bds.y = y;
+    bds.divp = divp;
+    bds.modp = modp;
+    bds.stop = Qfalse;
+    if (RBIGNUM_LEN(x) > 10000 || RBIGNUM_LEN(y) > 10000) {
+  VALUE stop = Qfalse;
+  z = rb_thread_blocking_region(bigdivrem1, &bds, rb_big_stop, 
&bds.stop);
+    }
+    else {
+  z = bigdivrem1(&bds);
+    }
+    return z;
+}
+
 static void
 bigdivmod(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
 {
Posted by Yukihiro Matsumoto (Guest)
on 19.12.2007 06:50
(Received via mailing list)
$B$^$D$b$H(B $B$f$-$R$m$G$9(B

In message "Re: [ruby-dev:32632] Re: faster Bignum#*"
    on Wed, 19 Dec 2007 12:49:02 +0900, "Yusuke ENDOH" <mame@tsg.ne.jp> 
writes:

|$BB?G\D9@0?t1i;;$O1_<~N($N7W;;$H$+%i%$%U%2!<%`$H$+$K;H$($k$N$G!"2x$7$$(B
|$B<{MW$O$?$^$K$"$j$^$9(B ($B>P(B)

$B$J$k$[$I!#(Bw

|$B$H$j$"$($:!"5pBg$J@0?t$N>h;;$d(B to_s $B$r$7$?$H$-(B Ctrl-C $B$,$-$+$J$/$J$k(B
|$B$N$O$J$s$H$+$7$H$$$?$i4r$7$$$H;W$$$^$9!#(B
|$B$=$3$GBg$-$$?t$N(B rb_big_mul0 $B$H(B divrem $B$r(B rb_thread_blocking_region $B$G(B
|$B<B9T$9$k%Q%C%A$r=q$$$F$_$^$7$?!#$I$&$G$7$g$&$+!#(B

$B;d$O<h$j9~$_$?$$$N$G$9$,!"$3$N%Q%C%A$N5;=QE*BEEv@-$,H=CG$G$-(B
$B$^$;$s!#$5$5$@$/$s$,(BOK$B$H8@$C$?$i<h$j9~$`$H$$$&$3$H$G!#(B
Posted by TOYOFUKU Chikanobu (Guest)
on 02.09.2008 10:57
(Received via mailing list)
$BK-J!$G$9!#(B

In [ruby-dev:33139]
> $B$=$3$GBg$-$$?t$N(B rb_big_mul0 $B$H(B divrem $B$r(B rb_thread_blocking_region $B$G(B
> $B<B9T$9$k%Q%C%A$r=q$$$F$_$^$7$?!#$I$&$G$7$g$&$+!#(B

> +rb_big_mul0(VALUE x, VALUE y)
  ...
> +    if (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > 10000) {

> +bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
  ...
> +    if (RBIGNUM_LEN(x) > 10000 || RBIGNUM_LEN(y) > 10000) {

  $B!J8=:_;H$C$F$$$k>h;;!"=|;;%"%k%4%j%:%`$N!K7W;;NL$+$i$9$k$H(B
    if (nx*ny > 10000) {
$B$H(B
    if ((nx-ny+1)*ny > 10000) {
$B$NJ}$,$h$5$=$&$J5$$,!#(B

$B$H$3$m$G(B bigmul1 $B$r8+$F5$$E$$$?$N$G$9$,(B

    for (i = 0; i < RBIGNUM_LEN(x); i++) {
        ...
        dd = BDIGITS(x)[i];
        ...
        for (j = 0; j < RBIGNUM_LEN(y); j++) {
            BDIGIT_DBL ee = n + (BDIGIT_DBL)dd * BDIGITS(y)[j];

$B$H%k!<%WCf$G(B BDIGITS() $B$r;H$C$?$^$^$K$J$C$F$^$9$,Kh2s(B 
BDIGITS() $B$G(B
RBIGNUM_EMBED_FLAG $B$N%A%'%C%/$,F~$k$N$G%k!<%WA0$K(B
  xds = BDIGITS(x);
  yds = BDIGITS(y);
$B$7$H$$$?J}$,$$$$$s$8$c$J$$$G$7$g$&$+!#(B
---