$B1sF#$H?=$7$^$9!#(B
Bignum#* $B$K(B FFT $B$rMQ$$$?>h;;$r<BAu$7$F$_$^$7$?!#(B
10 $B?J$G(B 100000
$B7e0J>e$"$k$h$&$J5pBg$J@0?t$N>h;;$,B.$/$J$j$^$9!#(B
$B$=$s$J<{MW$O$J$$$G$7$g$&$+!#(B
$ cat ../test_fft.rb
require "benchmark"
n = 10**10000
puts Benchmark.measure { 100.times { n * n } }
n = 10**100000
puts Benchmark.measure { 10.times { n * n } }
n = 10**1000000
puts Benchmark.measure { 1.times { n * n } }
# $B8=>u(B
$ ./ruby.org ../test_fft.rb
2.590000 0.000000 2.590000 ( 2.874859)
26.040000 0.010000 26.050000 ( 28.927583)
266.540000 0.040000 266.580000 (276.491797)
# FFT $BHG(B
$ ./ruby.fft ../test_fft.rb
2.630000 0.010000 2.640000 ( 2.636339)
2.790000 0.040000 2.830000 ( 2.836020)
6.270000 0.090000 6.360000 ( 6.368478)
FFT $B$N<BAu$O652J=qDL$j$G$9!#4p?t$b(B 65536 (=2**(8*sizeof(short)))
$B$+(B
256 (=2**(8*sizeof(char))) $B$N7h$aBG$A$G$9!#%U!<%j%(JQ49$H$+$h$/(B
$B$o$+$C$F$J$$$N$G!"%P%0$C$F$?$i$9$_$^$;$s!#>\$7$$J}$K8!>Z$d:F<BAu$r(B
$B$7$FD:$1$?$i:G9b$G$9!#(B
# $B$"$HC/$+!"%K%e!<%H%sK!$K$h$k=|;;$r<BAu$7$^$;$s!)(B
Index: bignum.c
===================================================================
--- bignum.c (revision 14308)
+++ bignum.c (working copy)
@@ -1442,10 +1442,145 @@
}
}
+static void
+big_fft(double *ar, double *ai, int al, double theta)
+{
+ int m, mh, i, j, k, irev;
+ double wr, wi, xr, xi;
+
+ i = 0;
+ for (j = 1; j < al - 1; j++) {
+ for (k = al >> 1; k > (i ^= k); k >>= 1);
+ if (j < i) {
+ xr = ar[j];
+ xi = ai[j];
+ ar[j] = ar[i];
+ ai[j] = ai[i];
+ ar[i] = xr;
+ ai[i] = xi;
+ }
+ }
+ for (mh = 1; (m = mh << 1) <= al; mh = m) {
+ irev = 0;
+ for (i = 0; i < al; i += m) {
+ wr = cos(theta * irev);
+ wi = sin(theta * irev);
+ for (k = al >> 2; k > (irev ^= k); k >>= 1);
+ for (j = i; j < mh + i; j++) {
+ k = j + mh;
+ xr = ar[j] - ar[k];
+ xi = ai[j] - ai[k];
+ ar[j] += ar[k];
+ ai[j] += ai[k];
+ ar[k] = wr * xr - wi * xi;
+ ai[k] = wr * xi + wi * xr;
+ }
+ }
+ }
+}
+
+static void
+big_digits_to_bases(int base, double *xr, long rlen, BDIGIT *xds, long
dlen)
+{
+ long i, j, k, div = 1 << (base * CHAR_BIT);
+ BDIGIT v;
+
+ k = 0;
+ for (i = 0; i < dlen; i++) {
+ v = xds[i];
+ for (j = 0; j < sizeof(BDIGIT) / base; j++) {
+ xr[k++] = (double) (v % div);
+ v /= div;
+ }
+ }
+ for (; k < rlen; k++) xr[k] = 0;
+}
+
+static void
+big_bases_to_digits(int base, BDIGIT *zds, long dlen, double *xr)
+{
+ long i, j, k, div = 1 << (base * CHAR_BIT);
+ BDIGIT v, s;
+ double x, xm;
+
+ x = 0;
+ i = j = 0;
+ v = 0;
+ s = 1;
+ for (k = 0;; k++) {
+ x += floor(xr[k] + 0.5);
+ xm = fmod(x, div);
+ v |= ((BDIGIT) xm) * s;
+ x = (x - xm) / div;
+ s *= div;
+ if (++j >= sizeof(BDIGIT) / base) {
+ zds[i++] = v;
+ if (i == dlen) return;
+ j = 0;
+ v = 0;
+ s = 1;
+ }
+ }
+}
+
static VALUE
+big_mul_fft(VALUE x, VALUE y)
+{
+ long i, len, base;
+ VALUE z;
+ double *xr, *xi, *yr, *yi, pi = atan(1.0) * 4.0;
+
+ /* calculate FFT length and decide the base of radix */
+ base = sizeof(short);
+ i = (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1) * sizeof(BDIGIT) / base;
+ for (len = 1; len < i; len *= 2);
+ if (len > 131072) {
+ len *= sizeof(short) / sizeof(char);
+ base = sizeof(char);
+ }
+
+ /* allocate two arraies of complexes */
+ xr = ALLOC_N(double, len);
+ xi = ALLOC_N(double, len);
+ yr = ALLOC_N(double, len);
+ yi = ALLOC_N(double, len);
+
+ /* initialize the arraies */
+ big_digits_to_bases(base, xr, len, BDIGITS(x), RBIGNUM_LEN(x));
+ big_digits_to_bases(base, yr, len, BDIGITS(y), RBIGNUM_LEN(y));
+ for (i = 0; i < len; i++) xi[i] = yi[i] = 0;
+
+ /* perform FFT */
+ big_fft(xr, xi, len, 2 * pi / len);
+ big_fft(yr, yi, len, 2 * pi / len);
+
+ /* multiply the arraies */
+ for (i = 0; i < len; i++) {
+ double t = xr[i] * yr[i] - xi[i] * yi[i];
+ xi[i] = xr[i] * yi[i] + xi[i] * yr[i];
+ xr[i] = t;
+ }
+ free(yr);
+ free(yi);
+
+ /* perform IFFT */
+ big_fft(xr, xi, len, -2 * pi / len);
+ free(xi);
+ for (i = 0; i < len; i++) xr[i] /= len;
+
+ /* make a result bignum */
+ len = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
+ z = bignew(len, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
+ big_bases_to_digits(base, BDIGITS(z), len, xr);
+ free(xr);
+
+ return z;
+}
+
+static VALUE
rb_big_mul0(VALUE x, VALUE y)
{
- long i, j;
+ long i, j, nx, ny, len;
BDIGIT_DBL n = 0;
VALUE z;
BDIGIT *zds;
@@ -1465,6 +1600,22 @@
return rb_num_coerce_bin(x, y);
}
+ /* estimate time */
+ for (i = nx = 0; i < RBIGNUM_LEN(x); i++) if (BDIGITS(x)[i]) nx++;
+ for (i = ny = 0; i < RBIGNUM_LEN(y); i++) if (BDIGITS(y)[i]) ny++;
+ for (len = 1; RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > len; len *= 2);
+
+ /* swap x and y (when y is more sparse than x) */
+ if (ny < nx) {
+ z = x; x = y; y = z; nx = ny;
+ }
+
+ /* multiply by FFT (if FFT seems to be faster) */
+ if ((double) nx * RBIGNUM_LEN(y) > (double) len * log(len) * 30) {
+ return big_mul_fft(x, y);
+ }
+
+ /* multiply in a normal way */
j = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
z = bignew(j, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
zds = BDIGITS(z);
on 18.12.2007 16:19
on 18.12.2007 18:04
$B$^$D$b$H(B $B$f$-$R$m$G$9(B
In message "Re: [ruby-dev:32629] faster Bignum#*"
on Wed, 19 Dec 2007 00:18:17 +0900, "Yusuke ENDOH" <mame@tsg.ne.jp>
writes:
|Bignum#* $B$K(B FFT $B$rMQ$$$?>h;;$r<BAu$7$F$_$^$7$?!#(B
|10 $B?J$G(B 100000 $B7e0J>e$"$k$h$&$J5pBg$J@0?t$N>h;;$,B.$/$J$j$^$9!#(B
|$B$=$s$J<{MW$O$J$$$G$7$g$&$+!#(B
$B$&!<$s!"$"$k$N$+$J!#%/%j%9%^%9$,=*$o$C$F$+$i<h$j9~$`J}8~$G9M(B
$B$($^$9!#(B
on 19.12.2007 04:50
$B1sF#$G$9!#(B
07/12/19 $B$K(B Yukihiro Matsumoto<matz@ruby-lang.org>
$B$5$s$O=q$-$^$7$?(B:
> $B$($^$9!#(B
$BB?G\D9@0?t1i;;$O1_<~N($N7W;;$H$+%i%$%U%2!<%`$H$+$K;H$($k$N$G!"2x$7$$(B
$B<{MW$O$?$^$K$"$j$^$9(B ($B>P(B)
$B$H$j$"$($:!"5pBg$J@0?t$N>h;;$d(B to_s $B$r$7$?$H$-(B Ctrl-C
$B$,$-$+$J$/$J$k(B
$B$N$O$J$s$H$+$7$H$$$?$i4r$7$$$H;W$$$^$9!#(B
$B$=$3$GBg$-$$?t$N(B rb_big_mul0 $B$H(B divrem $B$r(B
rb_thread_blocking_region $B$G(B
$B<B9T$9$k%Q%C%A$r=q$$$F$_$^$7$?!#$I$&$G$7$g$&$+!#(B
Index: bignum.c
===================================================================
--- bignum.c (revision 14313)
+++ bignum.c (working copy)
@@ -1442,34 +1442,32 @@
}
}
+static void
+rb_big_stop(void *ptr)
+{
+ VALUE *stop = (VALUE*)ptr;
+ *stop = Qtrue;
+}
+
+struct big_mul_struct {
+ VALUE x, y, stop;
+};
+
static VALUE
-rb_big_mul0(VALUE x, VALUE y)
+bigmul1(void *ptr)
{
+ struct big_mul_struct *bms = (struct big_mul_struct*)ptr;
long i, j;
BDIGIT_DBL n = 0;
- VALUE z;
+ VALUE x = bms->x, y = bms->y, z;
BDIGIT *zds;
- switch (TYPE(y)) {
- case T_FIXNUM:
- y = rb_int2big(FIX2LONG(y));
- break;
-
- case T_BIGNUM:
- break;
-
- case T_FLOAT:
- return DOUBLE2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y));
-
- default:
- return rb_num_coerce_bin(x, y);
- }
-
j = RBIGNUM_LEN(x) + RBIGNUM_LEN(y) + 1;
z = bignew(j, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
zds = BDIGITS(z);
while (j--) zds[j] = 0;
for (i = 0; i < RBIGNUM_LEN(x); i++) {
+ if (bms->stop) return Qnil;
BDIGIT_DBL dd = BDIGITS(x)[i];
if (dd == 0) continue;
n = 0;
@@ -1486,6 +1484,42 @@
return z;
}
+static VALUE
+rb_big_mul0(VALUE x, VALUE y)
+{
+ struct big_mul_struct bms;
+ VALUE z;
+
+ switch (TYPE(y)) {
+ case T_FIXNUM:
+ y = rb_int2big(FIX2LONG(y));
+ break;
+
+ case T_BIGNUM:
+ break;
+
+ case T_FLOAT:
+ return DOUBLE2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y));
+
+ default:
+ return rb_num_coerce_bin(x, y);
+ }
+
+ bms.x = x;
+ bms.y = y;
+ bms.stop = Qfalse;
+
+ if (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > 10000) {
+ VALUE stop = Qfalse;
+ z = rb_thread_blocking_region(bigmul1, &bms, rb_big_stop, &bms.stop);
+ }
+ else {
+ z = bigmul1(&bms);
+ }
+
+ return z;
+}
+
/*
* call-seq:
* big * other => Numeric
@@ -1499,9 +1533,15 @@
return bignorm(rb_big_mul0(x, y));
}
-static void
-bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
+struct big_div_struct {
+ VALUE x, y, *divp, *modp, stop;
+};
+
+static VALUE
+bigdivrem1(void *ptr)
{
+ struct big_div_struct *bds = (struct big_div_struct*)ptr;
+ VALUE x = bds->x, y = bds->y, *divp = bds->divp, *modp = bds->modp;
long nx = RBIGNUM_LEN(x), ny = RBIGNUM_LEN(y);
long i, j;
VALUE yy, z;
@@ -1575,6 +1615,7 @@
j = nx==ny?nx+1:nx;
do {
+ if (bds->stop) return Qnil;
if (zds[j] == yds[ny-1]) q = BIGRAD-1;
else q = (BDIGIT)((BIGUP(zds[j]) + zds[j-1])/yds[ny-1]);
if (q) {
@@ -1627,6 +1668,27 @@
}
}
+static VALUE
+bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
+{
+ struct big_div_struct bds;
+ VALUE z;
+
+ bds.x = x;
+ bds.y = y;
+ bds.divp = divp;
+ bds.modp = modp;
+ bds.stop = Qfalse;
+ if (RBIGNUM_LEN(x) > 10000 || RBIGNUM_LEN(y) > 10000) {
+ VALUE stop = Qfalse;
+ z = rb_thread_blocking_region(bigdivrem1, &bds, rb_big_stop,
&bds.stop);
+ }
+ else {
+ z = bigdivrem1(&bds);
+ }
+ return z;
+}
+
static void
bigdivmod(VALUE x, VALUE y, VALUE *divp, VALUE *modp)
{
on 19.12.2007 06:50
$B$^$D$b$H(B $B$f$-$R$m$G$9(B
In message "Re: [ruby-dev:32632] Re: faster Bignum#*"
on Wed, 19 Dec 2007 12:49:02 +0900, "Yusuke ENDOH" <mame@tsg.ne.jp>
writes:
|$BB?G\D9@0?t1i;;$O1_<~N($N7W;;$H$+%i%$%U%2!<%`$H$+$K;H$($k$N$G!"2x$7$$(B
|$B<{MW$O$?$^$K$"$j$^$9(B ($B>P(B)
$B$J$k$[$I!#(Bw
|$B$H$j$"$($:!"5pBg$J@0?t$N>h;;$d(B to_s $B$r$7$?$H$-(B Ctrl-C $B$,$-$+$J$/$J$k(B
|$B$N$O$J$s$H$+$7$H$$$?$i4r$7$$$H;W$$$^$9!#(B
|$B$=$3$GBg$-$$?t$N(B rb_big_mul0 $B$H(B divrem $B$r(B rb_thread_blocking_region $B$G(B
|$B<B9T$9$k%Q%C%A$r=q$$$F$_$^$7$?!#$I$&$G$7$g$&$+!#(B
$B;d$O<h$j9~$_$?$$$N$G$9$,!"$3$N%Q%C%A$N5;=QE*BEEv@-$,H=CG$G$-(B
$B$^$;$s!#$5$5$@$/$s$,(BOK$B$H8@$C$?$i<h$j9~$`$H$$$&$3$H$G!#(B
on 02.09.2008 10:57
$BK-J!$G$9!#(B In [ruby-dev:33139] > $B$=$3$GBg$-$$?t$N(B rb_big_mul0 $B$H(B divrem $B$r(B rb_thread_blocking_region $B$G(B > $B<B9T$9$k%Q%C%A$r=q$$$F$_$^$7$?!#$I$&$G$7$g$&$+!#(B > +rb_big_mul0(VALUE x, VALUE y) ... > + if (RBIGNUM_LEN(x) + RBIGNUM_LEN(y) > 10000) { > +bigdivrem(VALUE x, VALUE y, VALUE *divp, VALUE *modp) ... > + if (RBIGNUM_LEN(x) > 10000 || RBIGNUM_LEN(y) > 10000) { $B!J8=:_;H$C$F$$$k>h;;!"=|;;%"%k%4%j%:%`$N!K7W;;NL$+$i$9$k$H(B if (nx*ny > 10000) { $B$H(B if ((nx-ny+1)*ny > 10000) { $B$NJ}$,$h$5$=$&$J5$$,!#(B $B$H$3$m$G(B bigmul1 $B$r8+$F5$$E$$$?$N$G$9$,(B for (i = 0; i < RBIGNUM_LEN(x); i++) { ... dd = BDIGITS(x)[i]; ... for (j = 0; j < RBIGNUM_LEN(y); j++) { BDIGIT_DBL ee = n + (BDIGIT_DBL)dd * BDIGITS(y)[j]; $B$H%k!<%WCf$G(B BDIGITS() $B$r;H$C$?$^$^$K$J$C$F$^$9$,Kh2s(B BDIGITS() $B$G(B RBIGNUM_EMBED_FLAG $B$N%A%'%C%/$,F~$k$N$G%k!<%WA0$K(B xds = BDIGITS(x); yds = BDIGITS(y); $B$7$H$$$?J}$,$$$$$s$8$c$J$$$G$7$g$&$+!#(B ---