Skip to content

Commit 6b2506b

Browse files
committed
Implement Newton-Raphson division
Improve performance of huge divisions
1 parent 5a8d8cb commit 6b2506b

File tree

5 files changed

+334
-39
lines changed

5 files changed

+334
-39
lines changed

bigdecimal.gemspec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ Gem::Specification.new do |s|
4343
ext/bigdecimal/bigdecimal.c
4444
ext/bigdecimal/bigdecimal.h
4545
ext/bigdecimal/bits.h
46+
ext/bigdecimal/div.h
4647
ext/bigdecimal/feature.h
4748
ext/bigdecimal/missing.c
4849
ext/bigdecimal/missing.h

ext/bigdecimal/bigdecimal.c

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@
2929
#endif
3030

3131
#include "bits.h"
32+
#include "div.h"
3233
#include "static_assert.h"
3334

3435
#if SIZEOF_DECDIG == 4
3536
#define USE_NTT_MULTIPLICATION 1
3637
#include "ntt.h"
3738
#define NTT_MULTIPLICATION_THRESHOLD 100
39+
#define NEWTON_RAPHSON_DIVISION_THRESHOLD 200
3840
#endif
3941

4042
#define BIGDECIMAL_VERSION "3.2.3"
@@ -81,11 +83,6 @@ static struct {
8183
uint8_t mode;
8284
} rbd_rounding_modes[RBD_NUM_ROUNDING_MODES];
8385

84-
typedef struct {
85-
VALUE bigdecimal;
86-
Real *real;
87-
} BDVALUE;
88-
8986
typedef struct {
9087
VALUE bigdecimal_or_nil;
9188
Real *real_or_null;
@@ -213,7 +210,6 @@ rbd_allocate_struct_zero(int sign, size_t const digits)
213210
static unsigned short VpGetException(void);
214211
static void VpSetException(unsigned short f);
215212
static void VpCheckException(Real *p, bool always);
216-
static int AddExponent(Real *a, SIGNED_VALUE n);
217213
static VALUE CheckGetValue(BDVALUE v);
218214
static void VpInternalRound(Real *c, size_t ixDigit, DECDIG vPrev, DECDIG v);
219215
static int VpLimitRound(Real *c, size_t ixDigit);
@@ -1105,9 +1101,6 @@ BigDecimal_check_num(Real *p)
11051101
VpCheckException(p, true);
11061102
}
11071103

1108-
static VALUE BigDecimal_fix(VALUE self);
1109-
static VALUE BigDecimal_split(VALUE self);
1110-
11111104
/* Returns the value as an Integer.
11121105
*
11131106
* If the BigDecimal is infinity or NaN, raises FloatDomainError.
@@ -3238,19 +3231,39 @@ BigDecimal_literal(const char *str)
32383231

32393232
#ifdef BIGDECIMAL_USE_VP_TEST_METHODS
32403233
VALUE
3241-
BigDecimal_vpdivd(VALUE self, VALUE r, VALUE cprec) {
3242-
BDVALUE a,b,c,d;
3234+
BigDecimal_vpdivd_generic(VALUE self, VALUE r, VALUE cprec, void (*vpdivd_func)(Real*, Real*, Real*, Real*)) {
3235+
BDVALUE a, b, c, d;
32433236
size_t cn = NUM2INT(cprec);
32443237
a = GetBDValueMust(self);
32453238
b = GetBDValueMust(r);
32463239
c = NewZeroWrap(1, cn * BASE_FIG);
32473240
d = NewZeroWrap(1, VPDIVD_REM_PREC(a.real, b.real, c.real) * BASE_FIG);
3248-
VpDivd(c.real, d.real, a.real, b.real);
3241+
vpdivd_func(c.real, d.real, a.real, b.real);
32493242
RB_GC_GUARD(a.bigdecimal);
32503243
RB_GC_GUARD(b.bigdecimal);
32513244
return rb_assoc_new(c.bigdecimal, d.bigdecimal);
32523245
}
32533246

3247+
void
3248+
VpDivdNormal(Real *c, Real *r, Real *a, Real *b) {
3249+
VpDivd(c, r, a, b);
3250+
}
3251+
3252+
VALUE
3253+
BigDecimal_vpdivd(VALUE self, VALUE r, VALUE cprec) {
3254+
return BigDecimal_vpdivd_generic(self, r, cprec, VpDivdNormal);
3255+
}
3256+
3257+
VALUE
3258+
BigDecimal_vpdivd_newton(VALUE self, VALUE r, VALUE cprec) {
3259+
return BigDecimal_vpdivd_generic(self, r, cprec, VpDivdNewton);
3260+
}
3261+
3262+
VALUE
3263+
BigDecimal_newton_raphson_inverse(VALUE self, VALUE prec) {
3264+
return newton_raphson_inverse(self, NUM2SIZET(prec));
3265+
}
3266+
32543267
VALUE
32553268
BigDecimal_vpmult(VALUE self, VALUE v) {
32563269
BDVALUE a,b,c;
@@ -3652,6 +3665,8 @@ Init_bigdecimal(void)
36523665

36533666
#ifdef BIGDECIMAL_USE_VP_TEST_METHODS
36543667
rb_define_method(rb_cBigDecimal, "vpdivd", BigDecimal_vpdivd, 2);
3668+
rb_define_method(rb_cBigDecimal, "vpdivd_newton", BigDecimal_vpdivd_newton, 2);
3669+
rb_define_method(rb_cBigDecimal, "newton_raphson_inverse", BigDecimal_newton_raphson_inverse, 1);
36553670
rb_define_method(rb_cBigDecimal, "vpmult", BigDecimal_vpmult, 1);
36563671
#ifdef USE_NTT_MULTIPLICATION
36573672
rb_define_method(rb_cBigDecimal, "nttmult", BigDecimal_nttmult, 1);
@@ -5042,6 +5057,14 @@ VpDivd(Real *c, Real *r, Real *a, Real *b)
50425057

50435058
if (word_a > word_r || word_b + word_c - 2 >= word_r) goto space_error;
50445059

5060+
#ifdef USE_NTT_MULTIPLICATION
5061+
// Newton-Raphson division requires multiplication to be faster than O(n^2)
5062+
if (word_c >= NEWTON_RAPHSON_DIVISION_THRESHOLD && word_b >= NEWTON_RAPHSON_DIVISION_THRESHOLD) {
5063+
VpDivdNewton(c, r, a, b);
5064+
goto Exit;
5065+
}
5066+
#endif
5067+
50455068
for (i = 0; i < word_a; ++i) r->frac[i] = a->frac[i];
50465069
for (i = word_a; i < word_r; ++i) r->frac[i] = 0;
50475070
for (i = 0; i < word_c; ++i) c->frac[i] = 0;

ext/bigdecimal/bigdecimal.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,11 @@ typedef struct {
188188
DECDIG frac[FLEXIBLE_ARRAY_SIZE]; /* Array of fraction part. */
189189
} Real;
190190

191+
typedef struct {
192+
VALUE bigdecimal;
193+
Real *real;
194+
} BDVALUE;
195+
191196
/*
192197
* ------------------
193198
* EXPORTables.
@@ -232,10 +237,31 @@ VP_EXPORT int VpActiveRound(Real *y, Real *x, unsigned short f, ssize_t il);
232237
VP_EXPORT int VpMidRound(Real *y, unsigned short f, ssize_t nf);
233238
VP_EXPORT int VpLeftRound(Real *y, unsigned short f, ssize_t nf);
234239
VP_EXPORT void VpFrac(Real *y, Real *x);
240+
VP_EXPORT int AddExponent(Real *a, SIGNED_VALUE n);
235241

236242
/* VP constants */
237243
VP_EXPORT Real *VpOne(void);
238244

245+
/*
246+
* **** BigDecimal part ****
247+
*/
248+
VP_EXPORT VALUE BigDecimal_lt(VALUE self, VALUE r);
249+
VP_EXPORT VALUE BigDecimal_ge(VALUE self, VALUE r);
250+
VP_EXPORT VALUE BigDecimal_exponent(VALUE self);
251+
VP_EXPORT VALUE BigDecimal_fix(VALUE self);
252+
VP_EXPORT VALUE BigDecimal_frac(VALUE self);
253+
VP_EXPORT VALUE BigDecimal_add(VALUE self, VALUE b);
254+
VP_EXPORT VALUE BigDecimal_sub(VALUE self, VALUE b);
255+
VP_EXPORT VALUE BigDecimal_mult(VALUE self, VALUE b);
256+
VP_EXPORT VALUE BigDecimal_add2(VALUE self, VALUE b, VALUE n);
257+
VP_EXPORT VALUE BigDecimal_sub2(VALUE self, VALUE b, VALUE n);
258+
VP_EXPORT VALUE BigDecimal_mult2(VALUE self, VALUE b, VALUE n);
259+
VP_EXPORT VALUE BigDecimal_split(VALUE self);
260+
VP_EXPORT VALUE BigDecimal_decimal_shift(VALUE self, VALUE v);
261+
VP_EXPORT inline BDVALUE GetBDValueMust(VALUE v);
262+
VP_EXPORT inline BDVALUE rbd_allocate_struct_zero_wrap(int sign, size_t const digits);
263+
#define NewZeroWrap rbd_allocate_struct_zero_wrap
264+
239265
/*
240266
* ------------------
241267
* MACRO definitions.

ext/bigdecimal/div.h

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
// Calculate the inverse of x using the Newton-Raphson method.
2+
static VALUE
3+
newton_raphson_inverse(VALUE x, size_t prec) {
4+
BDVALUE bdone = NewZeroWrap(1, 1);
5+
VpSetOne(bdone.real);
6+
VALUE one = bdone.bigdecimal;
7+
8+
// Initial approximation in 2 digits
9+
BDVALUE bdx = GetBDValueMust(x);
10+
BDVALUE inv0 = NewZeroWrap(1, 2 * BIGDECIMAL_COMPONENT_FIGURES);
11+
VpSetOne(inv0.real);
12+
DECDIG_DBL numerator = (DECDIG_DBL)BIGDECIMAL_BASE * 100;
13+
DECDIG_DBL denominator = (DECDIG_DBL)bdx.real->frac[0] * 100 + (DECDIG_DBL)(bdx.real->Prec >= 2 ? bdx.real->frac[1] : 0) * 100 / BIGDECIMAL_BASE;
14+
inv0.real->frac[0] = (DECDIG)(numerator / denominator);
15+
inv0.real->frac[1] = (DECDIG)((numerator % denominator) * (BIGDECIMAL_BASE / 100) / denominator * 100);
16+
inv0.real->Prec = 2;
17+
inv0.real->exponent = 1 - bdx.real->exponent;
18+
VpNmlz(inv0.real);
19+
RB_GC_GUARD(bdx.bigdecimal);
20+
VALUE inv = inv0.bigdecimal;
21+
22+
int bl = 1;
23+
while (((size_t)1 << bl) < prec) bl++;
24+
25+
for (int i = bl; i >= 0; i--) {
26+
size_t n = (prec >> i) + 2;
27+
if (n > prec) n = prec;
28+
// Newton-Raphson iteration: inv_next = inv + inv * (1 - x * inv)
29+
VALUE one_minus_x_inv = BigDecimal_sub2(
30+
one,
31+
BigDecimal_mult(BigDecimal_mult2(x, one, SIZET2NUM(n + 1)), inv),
32+
SIZET2NUM(SIZET2NUM(n / 2))
33+
);
34+
inv = BigDecimal_add2(
35+
inv,
36+
BigDecimal_mult(inv, one_minus_x_inv),
37+
SIZET2NUM(n)
38+
);
39+
}
40+
return inv;
41+
}
42+
43+
// Calculates divmod by multiplying approximate reciprocal of y
44+
static void
45+
divmod_by_inv_mul(VALUE x, VALUE y, VALUE inv, VALUE *res_div, VALUE *res_mod) {
46+
VALUE div = BigDecimal_fix(BigDecimal_mult(x, inv));
47+
VALUE mod = BigDecimal_sub(x, BigDecimal_mult(div, y));
48+
while (RTEST(BigDecimal_lt(mod, INT2FIX(0)))) {
49+
mod = BigDecimal_add(mod, y);
50+
div = BigDecimal_sub(div, INT2FIX(1));
51+
}
52+
while (RTEST(BigDecimal_ge(mod, y))) {
53+
mod = BigDecimal_sub(mod, y);
54+
div = BigDecimal_add(div, INT2FIX(1));
55+
}
56+
*res_div = div;
57+
*res_mod = mod;
58+
}
59+
60+
static void
61+
slice_copy(DECDIG *dest, Real *src, size_t rshift, size_t length) {
62+
ssize_t start = src->exponent - rshift - length;
63+
if (start >= (ssize_t)src->Prec) return;
64+
if (start < 0) {
65+
dest -= start;
66+
length += start;
67+
start = 0;
68+
}
69+
size_t max_length = src->Prec - start;
70+
memcpy(dest, src->frac + start, Min(length, max_length) * sizeof(DECDIG));
71+
}
72+
73+
/* Calculates divmod using Newton-Raphson method.
74+
* x and y must be a BigDecimal representing an integer value.
75+
*
76+
* To calculate with low cost, we need to split x into blocks and perform divmod for each block.
77+
* x_digits = remaining_digits(<= y_digits) + block_digits * num_blocks
78+
*
79+
* Example:
80+
* xxx_xxxxx_xxxxx_xxxxx(18 digits) / yyyyy(5 digits)
81+
* remaining_digits = 3, block_digits = 5, num_blocks = 3
82+
* repeating xxxxx_xxxxxx.divmod(yyyyy) calculation 3 times.
83+
*
84+
* In each divmod step, dividend is at most (y_digits + block_digits) digits and divisor is y_digits digits.
85+
* Reciprocal of y needs block_digits + 1 precision.
86+
*/
87+
static void
88+
divmod_newton(VALUE x, VALUE y, VALUE *div_out, VALUE *mod_out) {
89+
size_t x_digits = NUM2SIZET(BigDecimal_exponent(x));
90+
size_t y_digits = NUM2SIZET(BigDecimal_exponent(y));
91+
if (x_digits <= y_digits) x_digits = y_digits + 1;
92+
93+
size_t n = x_digits / y_digits;
94+
size_t block_figs = (x_digits - y_digits) / n / BIGDECIMAL_COMPONENT_FIGURES + 1;
95+
size_t block_digits = block_figs * BIGDECIMAL_COMPONENT_FIGURES;
96+
size_t num_blocks = (x_digits - y_digits + block_digits - 1) / block_digits;
97+
size_t y_figs = (y_digits - 1) / BIGDECIMAL_COMPONENT_FIGURES + 1;
98+
VALUE yinv = newton_raphson_inverse(y, block_digits + 1);
99+
100+
BDVALUE divident = NewZeroWrap(1, BIGDECIMAL_COMPONENT_FIGURES * (y_figs + block_figs));
101+
BDVALUE div_result = NewZeroWrap(1, BIGDECIMAL_COMPONENT_FIGURES * (num_blocks * block_figs + 1));
102+
BDVALUE bdx = GetBDValueMust(x);
103+
104+
VALUE mod = BigDecimal_fix(BigDecimal_decimal_shift(x, SSIZET2NUM(-num_blocks * block_digits)));
105+
for (ssize_t i = num_blocks - 1; i >= 0; i--) {
106+
memset(divident.real->frac, 0, (y_figs + block_figs) * sizeof(DECDIG));
107+
108+
BDVALUE bdmod = GetBDValueMust(mod);
109+
slice_copy(divident.real->frac, bdmod.real, 0, y_figs);
110+
slice_copy(divident.real->frac + y_figs, bdx.real, i * block_figs, block_figs);
111+
RB_GC_GUARD(bdmod.bigdecimal);
112+
113+
VpSetSign(divident.real, 1);
114+
divident.real->exponent = y_figs + block_figs;
115+
divident.real->Prec = y_figs + block_figs;
116+
VpNmlz(divident.real);
117+
118+
VALUE div;
119+
divmod_by_inv_mul(divident.bigdecimal, y, yinv, &div, &mod);
120+
BDVALUE bddiv = GetBDValueMust(div);
121+
slice_copy(div_result.real->frac + (num_blocks - i - 1) * block_figs, bddiv.real, 0, block_figs + 1);
122+
RB_GC_GUARD(bddiv.bigdecimal);
123+
}
124+
VpSetSign(div_result.real, 1);
125+
div_result.real->exponent = num_blocks * block_figs + 1;
126+
div_result.real->Prec = num_blocks * block_figs + 1;
127+
VpNmlz(div_result.real);
128+
RB_GC_GUARD(bdx.bigdecimal);
129+
RB_GC_GUARD(divident.bigdecimal);
130+
RB_GC_GUARD(div_result.bigdecimal);
131+
*div_out = div_result.bigdecimal;
132+
*mod_out = mod;
133+
}
134+
135+
static VALUE
136+
VpDivdNewtonInner(VALUE args_ptr)
137+
{
138+
Real **args = (Real**)args_ptr;
139+
Real *c = args[0], *r = args[1], *a = args[2], *b = args[3];
140+
BDVALUE a2, b2, c2, r2;
141+
VALUE div, mod, a2_frac = Qnil;
142+
size_t div_prec = c->MaxPrec - 1;
143+
size_t base_prec = b->Prec;
144+
145+
a2 = NewZeroWrap(1, a->Prec * BIGDECIMAL_COMPONENT_FIGURES);
146+
b2 = NewZeroWrap(1, b->Prec * BIGDECIMAL_COMPONENT_FIGURES);
147+
VpAsgn(a2.real, a, 1);
148+
VpAsgn(b2.real, b, 1);
149+
VpSetSign(a2.real, 1);
150+
VpSetSign(b2.real, 1);
151+
a2.real->exponent = base_prec + div_prec;
152+
b2.real->exponent = base_prec;
153+
154+
if ((ssize_t)a2.real->Prec > a2.real->exponent) {
155+
a2_frac = BigDecimal_frac(a2.bigdecimal);
156+
VpMidRound(a2.real, VP_ROUND_DOWN, 0);
157+
}
158+
divmod_newton(a2.bigdecimal, b2.bigdecimal, &div, &mod);
159+
if (a2_frac != Qnil) mod = BigDecimal_add(mod, a2_frac);
160+
161+
c2 = GetBDValueMust(div);
162+
r2 = GetBDValueMust(mod);
163+
VpAsgn(c, c2.real, VpGetSign(a) * VpGetSign(b));
164+
VpAsgn(r, r2.real, VpGetSign(a));
165+
AddExponent(c, a->exponent);
166+
AddExponent(c, -b->exponent);
167+
AddExponent(c, -div_prec);
168+
AddExponent(r, a->exponent);
169+
AddExponent(r, -base_prec - div_prec);
170+
RB_GC_GUARD(a2.bigdecimal);
171+
RB_GC_GUARD(a2.bigdecimal);
172+
RB_GC_GUARD(c2.bigdecimal);
173+
RB_GC_GUARD(r2.bigdecimal);
174+
return Qnil;
175+
}
176+
177+
static VALUE
178+
ensure_restore_prec_limit(VALUE limit)
179+
{
180+
VpSetPrecLimit(NUM2SIZET(limit));
181+
return Qnil;
182+
}
183+
184+
static void
185+
VpDivdNewton(Real *c, Real *r, Real *a, Real *b)
186+
{
187+
Real *args[4] = {c, r, a, b};
188+
size_t pl = VpGetPrecLimit();
189+
VpSetPrecLimit(0);
190+
// Ensure restoring prec limit because some methods used in VpDivdNewtonInner may raise an exception
191+
rb_ensure(VpDivdNewtonInner, (VALUE)args, ensure_restore_prec_limit, SIZET2NUM(pl));
192+
}

0 commit comments

Comments
 (0)