-
Notifications
You must be signed in to change notification settings - Fork 194
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Performance issue on BigUint::div_rem
#323
Comments
As a proof of concept, I ported the python code mentioned above to rust: use num_bigint::BigUint;
use num_integer::Integer;
fn div2n1n(a: &BigUint, b: &BigUint, n: usize) -> (BigUint, BigUint) {
if a.bits() as usize <= 4000 + n {
return a.div_rem(b);
}
let aa;
let bb;
let pad = n & 1;
let (a, b, n) = if pad != 0 {
aa = a << 1;
bb = b << 1;
(&aa, &bb, n + 1)
} else {
(a, b, n)
};
let half_n = n >> 1;
let mask = (BigUint::from(1u32) << half_n) - 1u32;
let b1 = b >> half_n;
let b2 = b & &mask;
let (q1, r) = div3n2n(&(a >> n), &((a >> half_n) & &mask), b, &b1, &b2, half_n);
let (q2, mut r) = div3n2n(&r, &(a & &mask), b, &b1, &b2, half_n);
if pad != 0 {
r >>= 1;
}
((q1 << half_n) | q2, r)
}
fn div3n2n(
a12: &BigUint,
a3: &BigUint,
b: &BigUint,
b1: &BigUint,
b2: &BigUint,
n: usize,
) -> (BigUint, BigUint) {
let (mut q, r) = if a12 >> n == *b1 {
((BigUint::from(1u32) << n) - 1u32, a12 - (b1 << n) + b1)
} else {
div2n1n(a12, b1, n)
};
let mut r1 = r << n | a3;
let r2 = &q * b2;
while r1 < r2 {
q -= 1u32;
r1 += b;
}
(q, r1 - r2)
}
fn int2digits(a: &BigUint, n: usize) -> Vec<BigUint> {
let length = (a.bits() as usize).div_ceil(n);
let mut a_digits = vec![BigUint::ZERO; length];
fn inner(x: &BigUint, left: usize, right: usize, n: usize, a_digits: &mut Vec<BigUint>) {
if left + 1 == right {
a_digits[left] = x.clone();
return;
}
let mid = (left + right) >> 1;
let shift = (mid - left) * n;
let upper = x >> shift;
let lower = x - (&upper << shift);
inner(&lower, left, mid, n, a_digits);
inner(&upper, mid, right, n, a_digits);
}
if a != &BigUint::ZERO {
inner(a, 0, a_digits.len(), n, &mut a_digits);
}
a_digits
}
fn digits2int(digits: &[BigUint], n: usize) -> BigUint {
fn inner(left: usize, right: usize, n: usize, digits: &[BigUint]) -> BigUint {
if left + 1 == right {
return digits[left].clone();
}
let mid = (left + right) >> 1;
let shift = (mid - left) * n;
(inner(mid, right, n, digits) << shift) + inner(left, mid, n, digits)
}
if digits.is_empty() {
BigUint::ZERO
} else {
inner(0, digits.len(), n, digits)
}
}
fn divmod_pos(a: &BigUint, b: &BigUint) -> (BigUint, BigUint) {
let n = b.bits() as usize;
let a_digits = int2digits(a, n);
let mut r = BigUint::ZERO;
let mut q_digits = Vec::new();
for a_digit in a_digits.iter().rev() {
let a = &(r << n) + a_digit;
let (q_digit, new_r) = div2n1n(&a, b, n);
q_digits.push(q_digit);
r = new_r;
}
q_digits.reverse();
let q = digits2int(&q_digits, n);
(q, r)
}
fn int_divmod(a: &BigUint, b: &BigUint) -> (BigUint, BigUint) {
if b == &BigUint::ZERO {
panic!("division by zero");
} else if a == &BigUint::ZERO {
(BigUint::ZERO, BigUint::ZERO)
} else {
divmod_pos(a, b)
}
}
fn main() {
let x = BigUint::from(1u32) << (1u32 << 23);
let w = 1262611; // Half the size of x in decimal digits
int_divmod(&x, &BigUint::from(10u32).pow(w));
}
#[test]
fn test() {
let x = BigUint::from(1u32) << (1u32 << 23);
let w = 1262611; // Half the size of x in decimal digits
let (q, r) = int_divmod(&x, &BigUint::from(10u32).pow(w));
let (a, b) = x.div_rem(&BigUint::from(10u32).pow(w));
assert_eq!(a, q);
assert_eq!(b, r);
} It now only takes 300 ms, which is 16 times faster than the previous implementation (and 3 times faster than python): % time target/release/bigint
0,26s user 0,01s system 99% cpu 0,271 total |
This should be fixed by #316 It implements exactly this algorithm for division. |
Oh right I just noticed your commit: 018dd8f I ran my tests using the |
As I wrote in a previous comment, I noticed that the performance of
BigUint::div_rem
for large numbers is worse than pythonint.__divmod__
.Consider the following computation, i.e the splitting of
2^2^23
into two digits of base10**1262611
(1262611
being half the size of2^2^23
in decimal digits). It takes 1 second to run using python 3.13:Now consider the same computation using
num-bigint
:It takes more than 4 seconds:
% time ./target/release/bigint ./target/release/bigint 4,44s user 0,00s system 99% cpu 4,443 total
For the record, the
int.__divmod__
implementation is written in pure python:https://github.com/python/cpython/blob/2904ec2273762df58645a8e245b2281884855b8c/Lib/_pylong.py#L528-L531
Apparently, the complexity of this algorithm is
O(n**1.58)
:The text was updated successfully, but these errors were encountered: