From b62e286346581bc498c46932ff9e11621664f294 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 13 Nov 2025 21:43:46 +0900 Subject: [PATCH 01/11] feat(factor): improve big integer factoring and add num-integer - Add num-integer dependency to support enhanced numeric operations. - Refactor factorization logic to avoid redundant parsing and optimize u64/u128 paths. - Improve handling of non-positive and invalid inputs to align with GNU factor behavior. - Enhance large BigUint factoring with additional algorithms and clearer limitations. --- Cargo.toml | 1 + src/uu/factor/src/factor.rs | 431 ++++++++++++++++++++++++++++++++---- 2 files changed, 392 insertions(+), 40 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b388373a2aa..d350157ec32 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -347,6 +347,7 @@ nix = { version = "0.30", default-features = false } nom = "8.0.0" notify = { version = "=8.2.0", features = ["macos_kqueue"] } num-bigint = "0.4.4" +num-integer = "0.1" num-prime = "0.4.4" num-traits = "0.2.19" onig = { version = "~6.5.1", default-features = false } diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 15af962d659..469f1ec4caa 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -4,6 +4,13 @@ // file that was distributed with this source code. // spell-checker:ignore funcs +// NOTE: +// For BigUint > u128, this implementation attempts factorization using Miller-Rabin, +// an improved Pollard-rho, and p-1. +// However, compared to GNU factor, there may still be differences in performance +// and success rate. +// To further approach GNU factor behavior, additional algorithms (e.g. ECM) +// and parameter tuning may be required. use std::collections::BTreeMap; use std::io::BufRead; @@ -11,9 +18,9 @@ use std::io::{self, Write, stdin, stdout}; use clap::{Arg, ArgAction, Command}; use num_bigint::BigUint; -use num_traits::FromPrimitive; +use num_traits::{FromPrimitive, One, Zero}; use uucore::display::Quotable; -use uucore::error::{FromIo, UResult, USimpleError, set_exit_code}; +use uucore::error::{FromIo, UResult, set_exit_code}; use uucore::translate; use uucore::{format_usage, show_error, show_warning}; @@ -28,52 +35,55 @@ fn print_factors_str( w: &mut io::BufWriter, print_exponents: bool, ) -> UResult<()> { - let rx = num_str.trim().parse::(); - let Ok(x) = rx else { - // return Ok(). it's non-fatal and we should try the next number. - show_warning!("{}: {}", num_str.maybe_quote(), rx.unwrap_err()); + let s = num_str.trim(); + + // First, interpret as BigUint. + let rx_big = s.parse::(); + let Ok(x_big) = rx_big else { + // Non-fatal error. Proceed to the next input number. + show_warning!("{}: {}", num_str.maybe_quote(), rx_big.unwrap_err()); set_exit_code(1); return Ok(()); }; - if x > BigUint::from_u32(1).unwrap() { - // use num_prime's factorize64 algorithm for u64 integers - if x <= BigUint::from_u64(u64::MAX).unwrap() { - let prime_factors = num_prime::nt_funcs::factorize64(x.clone().to_u64_digits()[0]); - write_result_u64(w, &x, prime_factors, print_exponents) - .map_err_context(|| translate!("factor-error-write-error"))?; - } - // use num_prime's factorize128 algorithm for u128 integers - else if x <= BigUint::from_u128(u128::MAX).unwrap() { - let rx = num_str.trim().parse::(); - let Ok(x) = rx else { - // return Ok(). it's non-fatal and we should try the next number. - show_warning!("{}: {}", num_str.maybe_quote(), rx.unwrap_err()); - set_exit_code(1); - return Ok(()); - }; - let prime_factors = num_prime::nt_funcs::factorize128(x); - write_result_u128(w, &x, prime_factors, print_exponents) - .map_err_context(|| translate!("factor-error-write-error"))?; - } - // use num_prime's fallible factorization for anything greater than u128::MAX - else { - let (prime_factors, remaining) = num_prime::nt_funcs::factors(x.clone(), None); - if let Some(_remaining) = remaining { - return Err(USimpleError::new( - 1, - translate!("factor-error-factorization-incomplete"), - )); - } - write_result_big_uint(w, &x, prime_factors, print_exponents) - .map_err_context(|| translate!("factor-error-write-error"))?; - } - } else { + if x_big <= BigUint::from_u32(1).unwrap() { + // For values <= 1: as in GNU factor, print the input with no prime factors. let empty_primes: BTreeMap = BTreeMap::new(); - write_result_big_uint(w, &x, empty_primes, print_exponents) + write_result_big_uint(w, &x_big, empty_primes, print_exponents) + .map_err_context(|| translate!("factor-error-write-error"))?; + return Ok(()); + } + + // Try parsing directly into u64 / u128 and delegate to num_prime if successful. + // This avoids unnecessary BigUint conversions and speeds up the common cases. + if let Ok(v) = s.parse::() { + let prime_factors = num_prime::nt_funcs::factorize64(v); + write_result_u64(w, &x_big, prime_factors, print_exponents) .map_err_context(|| translate!("factor-error-write-error"))?; + return Ok(()); + } + + if let Ok(v) = s.parse::() { + let prime_factors = num_prime::nt_funcs::factorize128(v); + write_result_u128(w, &v, prime_factors, print_exponents) + .map_err_context(|| translate!("factor-error-write-error"))?; + return Ok(()); + } + + // For BigUint > u128: use our own recursive factorization based on + // Miller-Rabin + Pollard-rho + p-1. + let mut factors: Vec = Vec::new(); + let success = factor_biguint_recursive(&x_big, &mut factors); + + if !success { + // 完全分解できなかった場合のみ exit code=1 + set_exit_code(1); } + let factorization = collect_biguint_factors(&factors); + write_result_big_uint(w, &x_big, factorization, print_exponents) + .map_err_context(|| translate!("factor-error-write-error"))?; + Ok(()) } @@ -123,6 +133,347 @@ fn write_result_u128( w.flush() } +fn collect_biguint_factors(factors: &[BigUint]) -> BTreeMap { + let mut map = BTreeMap::::new(); + for f in factors { + *map.entry(f.clone()).or_insert(0) += 1; + } + map +} + +fn is_probable_prime(n: &BigUint) -> bool { + if *n < BigUint::from_u32(2).unwrap() { + return false; + } + if *n == BigUint::from_u32(2).unwrap() || *n == BigUint::from_u32(3).unwrap() { + return true; + } + // even check: n % 2 == 0 + if (&*n & BigUint::from_u32(1).unwrap()).is_zero() { + return false; + } + + let one = BigUint::one(); + let two = BigUint::from_u32(2).unwrap(); + + // n - 1 = d * 2^s + let mut d = n - &one; + let mut s = 0u32; + // while d is even + while (&d & BigUint::from_u32(1).unwrap()).is_zero() { + d >>= 1; + s += 1; + } + + let bases_32: [u64; 3] = [2, 7, 61]; + let bases_64: [u64; 12] = [ + 2, 325, 9375, 28178, 450775, 9780504, 1795265022, 3, 5, 7, 11, 13, + ]; + + let bases: Vec = if n.bits() <= 32 { + bases_32.to_vec() + } else if n.bits() <= 64 { + bases_64.to_vec() + } else { + vec![2, 3, 5, 7, 11, 13, 17, 19, 23] + }; + + 'outer: for &a_u64 in &bases { + if BigUint::from(a_u64) >= *n { + continue; + } + let a = BigUint::from(a_u64); + let mut x = a.modpow(&d, n); + if x == one || x == n - &one { + continue 'outer; + } + + for _ in 1..s { + x = x.modpow(&two, n); + if x == n - &one { + continue 'outer; + } + if x == one { + return false; + } + } + return false; + } + + true +} + +fn small_trial_division(n: &BigUint) -> Option { + // Quickly strip very small prime factors before applying expensive algorithms. + // This is intentionally lightweight while still covering a reasonably wide range. + // GNU factor maintains a large trial table; here we mimic only a small portion of it. + // + // NOTE: By removing many small prime factors here, we significantly reduce the + // search space and failure count of subsequent Pollard-rho / p-1 steps. + const SMALL_PRIMES: [u16; 54] = [ + 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, + 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, + 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, + ]; + + for &p in &SMALL_PRIMES { + let p_big = BigUint::from_u32(p as u32).unwrap(); + if n == &p_big { + return None; + } + if (n % &p_big).is_zero() { + return Some(p_big); + } + } + None +} + +//// Simplified Pollard p-1 method (Stage 1 only). +//// Effective when p-1 (for a prime divisor p of n) is smooth with small prime factors. +fn pollard_p_minus_1(n: &BigUint) -> Option { + // Stage 1 only (simplified). + // Best-effort: we do not spend too long here; give up quickly if it does not help. + let one = BigUint::one(); + let two = BigUint::from_u32(2).unwrap(); + + if n.is_zero() || n.is_one() { + return None; + } + + if (n & &one).is_zero() { + return Some(two); + } + + let bits = n.bits(); + + // Keep B1 relatively small to avoid excessive cost. + // (GNU factor adjusts based on input and retries; we use a fixed approximation.) + let b1: u64 = if bits <= 256 { + 10_000 + } else if bits <= 512 { + 20_000 + } else { + 50_000 + }; + + // Only try a few small prime bases. + const BASES: [u64; 3] = [2, 3, 5]; + + for &base in &BASES { + let mut a = BigUint::from(base); + if &a >= n { + continue; + } + + let mut g = gcd_biguint(&a, n); + if g > one && &g < n { + return Some(g); + } + + // a^(M) を段階的に構成(指数を 2^k で伸ばす近似) + let mut e = 2u64; + while e <= b1 { + a = a.modpow(&BigUint::from_u64(e).unwrap(), n); + if a.is_one() { + break; + } + let am1 = if a > one { &a - &one } else { continue }; + g = gcd_biguint(&am1, n); + if g > one && &g < n { + return Some(g); + } + e <<= 1; + } + } + + None +} + +//// Improved Pollard-rho (Brent variant with batched gcd). +//// Not equivalent to GNU factor, but aims for better convergence and performance +//// than a naive implementation. +fn pollard_rho(n: &BigUint) -> Option { + // NOTE: + // - This implementation is inspired by the approach in GNU factor but simplified. + // - For large inputs we avoid running too long; we cap the iterations so that + // we do not spend many seconds on hopeless cases. + // - If factorization fails, we return "Factorization incomplete"-style results. + let one = BigUint::one(); + + // For small n we expect earlier code paths to have handled the input. + if *n <= BigUint::from_u32(3).unwrap() { + return None; + } + + // If n is even, return 2 immediately. + if (n & &one).is_zero() { + return Some(BigUint::from_u32(2).unwrap()); + } + + // Use a deterministic LCG to generate parameter sequences. + fn lcg_next(x: &mut u128) { + *x = x + .wrapping_mul(6364136223846793005) + .wrapping_add(1442695040888963407); + } + + let bits = n.bits() as u64; + + // Search parameters: choose bounds based on bit length. + // Avoid overly large limits; when exhausted, treat as failure to find a factor. + let max_tries: u64 = 16; + let max_iter: u64 = (bits * bits).min(200_000).max(10_000); + + let mut seed: u128 = 0x9e3779b97f4a7c15; + + for _try in 0..max_tries { + lcg_next(&mut seed); + let mut x = BigUint::from(seed % (u128::MAX / 2 + 1)); + lcg_next(&mut seed); + let mut c = BigUint::from(seed % (u128::MAX / 2 + 1)); + if c.is_zero() { + c = BigUint::from(1u32); + } + x %= n; + c %= n; + + let mut y = x.clone(); + let mut g = one.clone(); + let mut q = one.clone(); + + let mut iter: u64 = 0; + let m: u64 = 128; + + while g == one && iter < max_iter { + // Brent variant: use batched gcd. + let mut k = 0; + let x_saved = x.clone(); + while k < m && iter < max_iter { + // f(z) = z^2 + c mod n (implemented via multiplication + mod, not modpow). + y = (&y * &y + &c) % n; + let diff = if &x > &y { &x - &y } else { &y - &x }; + if !diff.is_zero() { + q = (q * diff) % n; + } + k += 1; + iter += 1; + } + g = gcd_biguint(&q, n); + x = x_saved; + + if g == one { + // Update x to advance the sequence. + x = y.clone(); + } + } + + if g == one { + continue; + } + if &g == n { + // Fallback: step-by-step gcd. + let mut z = x.clone(); + loop { + z = (&z * &z + &c) % n; + let diff = if &z > &y { &z - &y } else { &y - &z }; + g = gcd_biguint(&diff, n); + if g > one || z == y { + break; + } + } + } + + if g > one && &g < n { + return Some(g); + } + } + + None +} + +//// Recursively factor n and append factors (primes or unfactored composites) to `factors`. +//// Returns true if full factorization succeeded, false otherwise. +fn gcd_biguint(a: &BigUint, b: &BigUint) -> BigUint { + // Standard Euclidean algorithm using owned BigUint values to avoid lifetime issues. + let mut x = a.clone(); + let mut y = b.clone(); + while !y.is_zero() { + let r = &x % &y; + x = y; + y = r; + } + x +} + +fn factor_biguint_recursive(n: &BigUint, factors: &mut Vec) -> bool { + let one = BigUint::one(); + if *n <= one { + return true; + } + + // First remove small prime factors, then apply more expensive methods. + if let Some(p) = small_trial_division(n) { + let mut q = n.clone(); + let mut ok = true; + while (&q % &p).is_zero() { + q /= &p; + factors.push(p.clone()); + } + if !q.is_one() { + ok &= factor_biguint_recursive(&q, factors); + } + return ok; + } + + // If n is small enough, use num_prime's factorize128 for speed. + if n.bits() <= 128 { + if let Ok(x128) = n.to_string().parse::() { + let pf = num_prime::nt_funcs::factorize128(x128); + if !pf.is_empty() { + for (p, e) in pf { + for _ in 0..e { + factors.push(BigUint::from(p)); + } + } + return true; + } + } + } + + if is_probable_prime(n) { + factors.push(n.clone()); + return true; + } + + // Try Pollard p-1 first (simplified Stage 1). + if let Some(f) = pollard_p_minus_1(n) { + if f.is_one() || &f == n { + // Treat as failure. + } else { + let q = n / &f; + let left_ok = factor_biguint_recursive(&f, factors); + let right_ok = factor_biguint_recursive(&q, factors); + return left_ok && right_ok; + } + } + + // Then try improved Pollard-rho (Brent variant). + if let Some(f) = pollard_rho(n) { + if f.is_one() || &f == n { + factors.push(n.clone()); + return false; + } + let q = n / &f; + let left_ok = factor_biguint_recursive(&f, factors); + let right_ok = factor_biguint_recursive(&q, factors); + return left_ok && right_ok; + } + + // If no factor was found, include n itself as part of the (incomplete) factorization. + factors.push(n.clone()); + false +} + /// Writing out the prime factors for BigUint integers fn write_result_big_uint( w: &mut io::BufWriter, From 2bde8f770e96225cc5a513e873cbdbb55f4be26c Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 25 Dec 2025 08:02:12 +0900 Subject: [PATCH 02/11] refactor(factor): readability and small perf tweaks --- src/uu/factor/src/factor.rs | 185 +++++++++++++++++++----------------- 1 file changed, 100 insertions(+), 85 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 469f1ec4caa..7d3bd884be4 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -18,7 +18,7 @@ use std::io::{self, Write, stdin, stdout}; use clap::{Arg, ArgAction, Command}; use num_bigint::BigUint; -use num_traits::{FromPrimitive, One, Zero}; +use num_traits::{FromPrimitive, One, ToPrimitive, Zero}; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, set_exit_code}; use uucore::translate; @@ -141,28 +141,32 @@ fn collect_biguint_factors(factors: &[BigUint]) -> BTreeMap { map } -fn is_probable_prime(n: &BigUint) -> bool { - if *n < BigUint::from_u32(2).unwrap() { +fn is_even(value: &BigUint) -> bool { + (value & BigUint::one()).is_zero() +} + +fn is_probable_prime(candidate: &BigUint) -> bool { + if *candidate < BigUint::from_u32(2).unwrap() { return false; } - if *n == BigUint::from_u32(2).unwrap() || *n == BigUint::from_u32(3).unwrap() { + if *candidate == BigUint::from_u32(2).unwrap() || *candidate == BigUint::from_u32(3).unwrap() { return true; } - // even check: n % 2 == 0 - if (&*n & BigUint::from_u32(1).unwrap()).is_zero() { + // even check: candidate % 2 == 0 + if is_even(candidate) { return false; } let one = BigUint::one(); let two = BigUint::from_u32(2).unwrap(); - // n - 1 = d * 2^s - let mut d = n - &one; - let mut s = 0u32; - // while d is even - while (&d & BigUint::from_u32(1).unwrap()).is_zero() { - d >>= 1; - s += 1; + // candidate - 1 = odd_component * 2^power_of_two + let mut odd_component = candidate - &one; + let mut power_of_two = 0u32; + // while odd_component is even + while is_even(&odd_component) { + odd_component >>= 1; + power_of_two += 1; } let bases_32: [u64; 3] = [2, 7, 61]; @@ -170,30 +174,30 @@ fn is_probable_prime(n: &BigUint) -> bool { 2, 325, 9375, 28178, 450775, 9780504, 1795265022, 3, 5, 7, 11, 13, ]; - let bases: Vec = if n.bits() <= 32 { + let bases: Vec = if candidate.bits() <= 32 { bases_32.to_vec() - } else if n.bits() <= 64 { + } else if candidate.bits() <= 64 { bases_64.to_vec() } else { vec![2, 3, 5, 7, 11, 13, 17, 19, 23] }; - 'outer: for &a_u64 in &bases { - if BigUint::from(a_u64) >= *n { + 'outer: for &base_value in &bases { + if BigUint::from(base_value) >= *candidate { continue; } - let a = BigUint::from(a_u64); - let mut x = a.modpow(&d, n); - if x == one || x == n - &one { + let base = BigUint::from(base_value); + let mut witness = base.modpow(&odd_component, candidate); + if witness == one || witness == candidate - &one { continue 'outer; } - for _ in 1..s { - x = x.modpow(&two, n); - if x == n - &one { + for _ in 1..power_of_two { + witness = witness.modpow(&two, candidate); + if witness == candidate - &one { continue 'outer; } - if x == one { + if witness == one { return false; } } @@ -228,8 +232,8 @@ fn small_trial_division(n: &BigUint) -> Option { None } -//// Simplified Pollard p-1 method (Stage 1 only). -//// Effective when p-1 (for a prime divisor p of n) is smooth with small prime factors. +/// Simplified Pollard p-1 method (Stage 1 only). +/// Effective when p-1 (for a prime divisor p of n) is smooth with small prime factors. fn pollard_p_minus_1(n: &BigUint) -> Option { // Stage 1 only (simplified). // Best-effort: we do not spend too long here; give up quickly if it does not help. @@ -240,7 +244,7 @@ fn pollard_p_minus_1(n: &BigUint) -> Option { return None; } - if (n & &one).is_zero() { + if is_even(n) { return Some(two); } @@ -289,120 +293,131 @@ fn pollard_p_minus_1(n: &BigUint) -> Option { None } -//// Improved Pollard-rho (Brent variant with batched gcd). -//// Not equivalent to GNU factor, but aims for better convergence and performance -//// than a naive implementation. -fn pollard_rho(n: &BigUint) -> Option { +/// Improved Pollard-rho (Brent variant with batched gcd). +/// Not equivalent to GNU factor, but aims for better convergence and performance +/// than a naive implementation. +fn pollard_rho(composite: &BigUint) -> Option { // NOTE: // - This implementation is inspired by the approach in GNU factor but simplified. // - For large inputs we avoid running too long; we cap the iterations so that // we do not spend many seconds on hopeless cases. // - If factorization fails, we return "Factorization incomplete"-style results. let one = BigUint::one(); + let two = BigUint::from_u32(2).unwrap(); // For small n we expect earlier code paths to have handled the input. - if *n <= BigUint::from_u32(3).unwrap() { + if *composite <= BigUint::from_u32(3).unwrap() { return None; } - // If n is even, return 2 immediately. - if (n & &one).is_zero() { - return Some(BigUint::from_u32(2).unwrap()); + // If composite is even, return 2 immediately. + if is_even(composite) { + return Some(two); } // Use a deterministic LCG to generate parameter sequences. + const LCG_MULTIPLIER: u128 = 6364136223846793005; + const LCG_INCREMENT: u128 = 1442695040888963407; + fn lcg_next(x: &mut u128) { - *x = x - .wrapping_mul(6364136223846793005) - .wrapping_add(1442695040888963407); + *x = x.wrapping_mul(LCG_MULTIPLIER).wrapping_add(LCG_INCREMENT); } - let bits = n.bits() as u64; + let bits = composite.bits(); // Search parameters: choose bounds based on bit length. // Avoid overly large limits; when exhausted, treat as failure to find a factor. let max_tries: u64 = 16; - let max_iter: u64 = (bits * bits).min(200_000).max(10_000); + let max_iter: u64 = (bits * bits).clamp(10_000, 200_000); - let mut seed: u128 = 0x9e3779b97f4a7c15; + const LCG_DEFAULT_SEED: u128 = 0x9e3779b97f4a7c15; + let mut seed: u128 = LCG_DEFAULT_SEED; for _try in 0..max_tries { lcg_next(&mut seed); - let mut x = BigUint::from(seed % (u128::MAX / 2 + 1)); + let mut x_state = BigUint::from(seed % (u128::MAX / 2 + 1)); lcg_next(&mut seed); - let mut c = BigUint::from(seed % (u128::MAX / 2 + 1)); - if c.is_zero() { - c = BigUint::from(1u32); + let mut constant = BigUint::from(seed % (u128::MAX / 2 + 1)); + if constant.is_zero() { + constant = BigUint::from(1u32); } - x %= n; - c %= n; + x_state %= composite; + constant %= composite; - let mut y = x.clone(); - let mut g = one.clone(); - let mut q = one.clone(); + let mut y_state = x_state.clone(); + let mut current_gcd = one.clone(); + let mut product = one.clone(); let mut iter: u64 = 0; - let m: u64 = 128; + let batch_size: u64 = 128; - while g == one && iter < max_iter { + while current_gcd == one && iter < max_iter { // Brent variant: use batched gcd. - let mut k = 0; - let x_saved = x.clone(); - while k < m && iter < max_iter { - // f(z) = z^2 + c mod n (implemented via multiplication + mod, not modpow). - y = (&y * &y + &c) % n; - let diff = if &x > &y { &x - &y } else { &y - &x }; + let mut batch_iter = 0; + let x_saved = x_state.clone(); + while batch_iter < batch_size && iter < max_iter { + // f(z) = z^2 + c mod composite. + y_state = (&y_state * &y_state + &constant) % composite; + let diff = if x_state > y_state { + &x_state - &y_state + } else { + &y_state - &x_state + }; if !diff.is_zero() { - q = (q * diff) % n; + product = (product * diff) % composite; } - k += 1; + batch_iter += 1; iter += 1; } - g = gcd_biguint(&q, n); - x = x_saved; + current_gcd = gcd_biguint(&product, composite); + x_state = x_saved; - if g == one { - // Update x to advance the sequence. - x = y.clone(); + if current_gcd == one { + // Update x_state to advance the sequence. + x_state.clone_from(&y_state); } } - if g == one { + if current_gcd == one { continue; } - if &g == n { + if ¤t_gcd == composite { // Fallback: step-by-step gcd. - let mut z = x.clone(); + let mut z_state = x_state.clone(); loop { - z = (&z * &z + &c) % n; - let diff = if &z > &y { &z - &y } else { &y - &z }; - g = gcd_biguint(&diff, n); - if g > one || z == y { + z_state = (&z_state * &z_state + &constant) % composite; + let diff = if z_state > y_state { + &z_state - &y_state + } else { + &y_state - &z_state + }; + current_gcd = gcd_biguint(&diff, composite); + if current_gcd > one || z_state == y_state { break; } } } - if g > one && &g < n { - return Some(g); + if current_gcd > one && ¤t_gcd < composite { + return Some(current_gcd); } } None } -//// Recursively factor n and append factors (primes or unfactored composites) to `factors`. -//// Returns true if full factorization succeeded, false otherwise. -fn gcd_biguint(a: &BigUint, b: &BigUint) -> BigUint { +/// Recursively factor n and append factors (primes or unfactored composites) to `factors`. +/// Returns true if full factorization succeeded, false otherwise. +fn gcd_biguint(lhs: &BigUint, rhs: &BigUint) -> BigUint { // Standard Euclidean algorithm using owned BigUint values to avoid lifetime issues. - let mut x = a.clone(); - let mut y = b.clone(); - while !y.is_zero() { - let r = &x % &y; - x = y; - y = r; + let mut dividend = lhs.clone(); + let mut divisor = rhs.clone(); + while !divisor.is_zero() { + let remainder = ÷nd % &divisor; + dividend = divisor; + divisor = remainder; } - x + dividend } fn factor_biguint_recursive(n: &BigUint, factors: &mut Vec) -> bool { @@ -427,7 +442,7 @@ fn factor_biguint_recursive(n: &BigUint, factors: &mut Vec) -> bool { // If n is small enough, use num_prime's factorize128 for speed. if n.bits() <= 128 { - if let Ok(x128) = n.to_string().parse::() { + if let Some(x128) = n.to_u128() { let pf = num_prime::nt_funcs::factorize128(x128); if !pf.is_empty() { for (p, e) in pf { From aea625a9a508fd6686a0537fb7b3a86e72625b93 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 25 Dec 2025 08:02:27 +0900 Subject: [PATCH 03/11] docs(factor): translate comments and note spellchecker --- src/uu/factor/src/factor.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 7d3bd884be4..2fff44b99bb 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore funcs +// spell-checker:ignore funcs biguint modpow unfactored // NOTE: // For BigUint > u128, this implementation attempts factorization using Miller-Rabin, // an improved Pollard-rho, and p-1. @@ -76,7 +76,7 @@ fn print_factors_str( let success = factor_biguint_recursive(&x_big, &mut factors); if !success { - // 完全分解できなかった場合のみ exit code=1 + // Only set exit code=1 when complete factorization could not be achieved set_exit_code(1); } @@ -274,7 +274,7 @@ fn pollard_p_minus_1(n: &BigUint) -> Option { return Some(g); } - // a^(M) を段階的に構成(指数を 2^k で伸ばす近似) + // Construct a^(M) step by step (approximating by extending the exponent with 2^k) let mut e = 2u64; while e <= b1 { a = a.modpow(&BigUint::from_u64(e).unwrap(), n); From b1b762a94c440e16600c056b81f0e5ba7dacbe48 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 25 Dec 2025 08:19:20 +0900 Subject: [PATCH 04/11] feat(benchmarks): add jemalloc integration for factor benchmarks - Integrate jemalloc allocator in factor benchmark suite for better memory profiling - Add jemalloc-ctl and jemallocator dependencies with OS-specific dev-dependencies - Implement logging of allocated and resident memory stats before benchmark runs - Update CI workflow to show output for uu_factor benchmarks without suppressing it - Enables precise memory usage tracking on Linux, macOS, and FreeBSD during benchmarking --- .github/workflows/benchmarks.yml | 6 ++++- Cargo.lock | 39 +++++++++++++++++++++++++++ Cargo.toml | 2 ++ src/uu/factor/Cargo.toml | 4 +++ src/uu/factor/benches/factor_bench.rs | 26 ++++++++++++++++++ 5 files changed, 76 insertions(+), 1 deletion(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 205f6c1a24e..5dbc1ff91f3 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -75,5 +75,9 @@ jobs: mode: instrumentation run: | echo "Running benchmarks for ${{ matrix.benchmark-target.package }}" - cargo codspeed run -p ${{ matrix.benchmark-target.package }} > /dev/null + if [[ "${{ matrix.benchmark-target.package }}" == "uu_factor" ]]; then + cargo codspeed run -p ${{ matrix.benchmark-target.package }} + else + cargo codspeed run -p ${{ matrix.benchmark-target.package }} > /dev/null + fi token: ${{ secrets.CODSPEED_TOKEN }} diff --git a/Cargo.lock b/Cargo.lock index 5781d4e32fd..5a81bcf38da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1563,6 +1563,37 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jemalloc-ctl" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cffc705424a344c054e135d12ee591402f4539245e8bbd64e6c9eaa9458b63c" +dependencies = [ + "jemalloc-sys", + "libc", + "paste", +] + +[[package]] +name = "jemalloc-sys" +version = "0.5.4+5.3.0-patched" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac6c1946e1cea1788cbfde01c993b52a10e2da07f4bac608228d1bed20bfebf2" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "jemallocator" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0de374a9f8e63150e6f5e8a60cc14c668226d7a347d8aee1a45766e3c4dd3bc" +dependencies = [ + "jemalloc-sys", + "libc", +] + [[package]] name = "jiff" version = "0.2.16" @@ -2040,6 +2071,12 @@ dependencies = [ "winnow", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "phf" version = "0.13.1" @@ -3317,6 +3354,8 @@ dependencies = [ "clap", "codspeed-divan-compat", "fluent", + "jemalloc-ctl", + "jemallocator", "num-bigint", "num-prime", "num-traits", diff --git a/Cargo.toml b/Cargo.toml index d350157ec32..7a50cb21a8d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -331,6 +331,8 @@ icu_locale = "2.0.0" icu_provider = "2.0.0" indicatif = "0.18.0" itertools = "0.14.0" +jemalloc-ctl = "0.5" +jemallocator = "0.5" jiff = { version = "0.2.10", default-features = false, features = [ "std", "alloc", diff --git a/src/uu/factor/Cargo.toml b/src/uu/factor/Cargo.toml index ef672bf9308..09b659e59cd 100644 --- a/src/uu/factor/Cargo.toml +++ b/src/uu/factor/Cargo.toml @@ -34,6 +34,10 @@ divan = { workspace = true } rand = { workspace = true } uucore = { workspace = true, features = ["benchmark"] } +[target.'cfg(any(target_os = "linux", target_os = "macos", target_os = "freebsd"))'.dev-dependencies] +jemalloc-ctl = { workspace = true } +jemallocator = { workspace = true } + [lib] path = "src/factor.rs" diff --git a/src/uu/factor/benches/factor_bench.rs b/src/uu/factor/benches/factor_bench.rs index 952ea09a616..93149eab694 100644 --- a/src/uu/factor/benches/factor_bench.rs +++ b/src/uu/factor/benches/factor_bench.rs @@ -9,6 +9,30 @@ use divan::{Bencher, black_box}; use uu_factor::uumain; use uucore::benchmark::run_util_function; +#[cfg(any(target_os = "linux", target_os = "macos", target_os = "freebsd"))] +use jemallocator::Jemalloc; + +#[cfg(any(target_os = "linux", target_os = "macos", target_os = "freebsd"))] +#[global_allocator] +static ALLOC: Jemalloc = Jemalloc; + +#[cfg(any(target_os = "linux", target_os = "macos", target_os = "freebsd"))] +fn log_jemalloc_stats(label: &str) { + use jemalloc_ctl::{epoch, stats}; + + epoch::advance().unwrap(); + let allocated = stats::allocated::read().unwrap(); + let resident = stats::resident::read().unwrap(); + + println!( + "jemalloc {label}: allocated={} bytes, resident={} bytes", + allocated, resident + ); +} + +#[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "freebsd")))] +fn log_jemalloc_stats(_label: &str) {} + /// Benchmark multiple u64 digits #[divan::bench(args = [(2)])] fn factor_multiple_u64s(bencher: Bencher, start_num: u64) { @@ -16,9 +40,11 @@ fn factor_multiple_u64s(bencher: Bencher, start_num: u64) { // this is a range of 5000 different u128 integers .with_inputs(|| (start_num, start_num + 2500)) .bench_values(|(start_u64, end_u64)| { + log_jemalloc_stats("before factor_multiple_u64s"); for u64_digit in start_u64..=end_u64 { black_box(run_util_function(uumain, &[&u64_digit.to_string()])); } + log_jemalloc_stats("after factor_multiple_u64s"); }); } From c0333f34db06028eb62ce7c280091ed81f4589c7 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 25 Dec 2025 08:22:42 +0900 Subject: [PATCH 05/11] chore(cspell): add jemalloc and jemallocator to jargon wordlist Add technical terms for memory allocation libraries to the cspell dictionary to prevent false positives in spellchecking. --- .vscode/cspell.dictionaries/jargon.wordlist.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index bd29bd2461d..2c08907167e 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -164,6 +164,8 @@ wordlists xattrs xpass +jemalloc +jemallocator # * abbreviations AMPM ampm From 2fc930b71b807e57f7330d483c2518a5a56c164f Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 27 Dec 2025 15:17:57 +0900 Subject: [PATCH 06/11] refactor(factor): use string interpolation in jemalloc stats logging Replace positional placeholders with named parameters in println! macro for improved readability and consistency with modern Rust formatting. --- src/uu/factor/benches/factor_bench.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/uu/factor/benches/factor_bench.rs b/src/uu/factor/benches/factor_bench.rs index 93149eab694..d212fb642c3 100644 --- a/src/uu/factor/benches/factor_bench.rs +++ b/src/uu/factor/benches/factor_bench.rs @@ -24,10 +24,7 @@ fn log_jemalloc_stats(label: &str) { let allocated = stats::allocated::read().unwrap(); let resident = stats::resident::read().unwrap(); - println!( - "jemalloc {label}: allocated={} bytes, resident={} bytes", - allocated, resident - ); + println!("jemalloc {label}: allocated={allocated} bytes, resident={resident} bytes"); } #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "freebsd")))] From f4b0dc5aeef0b6ec3d5ee8ae864be8e3596b3b35 Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 27 Dec 2025 15:24:49 +0900 Subject: [PATCH 07/11] ci: add gmake to FreeBSD CI dependencies Install GNU make (gmake) in the FreeBSD workflow prepare step to support building and testing, as the process requires GNU make utilities alongside existing tools like jq and GNU coreutils. --- .github/workflows/freebsd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/freebsd.yml b/.github/workflows/freebsd.yml index 84f6b55b295..a89af7d4861 100644 --- a/.github/workflows/freebsd.yml +++ b/.github/workflows/freebsd.yml @@ -49,7 +49,7 @@ jobs: sync: rsync copyback: false # We need jq and GNU coreutils to run show-utils.sh and bash to use inline shell string replacement - prepare: pkg install -y curl sudo jq coreutils bash + prepare: pkg install -y curl gmake sudo jq coreutils bash run: | ## Prepare, build, and test # implementation modelled after ref: From ea2e49a326a9757a2b11e8794ae6e66b39da40e1 Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 27 Dec 2025 15:36:07 +0900 Subject: [PATCH 08/11] chore: ignore RUSTSEC-2024-0436 for unmaintained paste crate Ignore security advisory RUSTSEC-2024-0436 for the unmaintained "paste" crate, which is used via jemalloc-ctl in the uu_factor benchmark. This suppresses the warning without impacting functionality, as the crate is not actively maintained. --- deny.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/deny.toml b/deny.toml index 662474b65cd..50e8c238736 100644 --- a/deny.toml +++ b/deny.toml @@ -10,6 +10,7 @@ version = 2 yanked = "warn" ignore = [ #"RUSTSEC-0000-0000", + "RUSTSEC-2024-0436", # paste: unmaintained (via jemalloc-ctl in uu_factor bench) ] # This section is considered when running `cargo deny check licenses` From bc8bc51782e40167cdff51e55cca8e0c5b2ab868 Mon Sep 17 00:00:00 2001 From: mattsu Date: Mon, 26 Jan 2026 08:14:32 +0900 Subject: [PATCH 09/11] Remove jemalloc dependencies from factor utility The jemalloc allocator and related dependencies (jemalloc-ctl, jemalloc-sys, paste) were removed from the factor utility's benchmark code. This change simplifies the dependency tree and removes platform-specific allocator logic that was only used for memory profiling in benchmarks. The benchmark functionality remains intact, just without jemalloc-specific memory statistics collection. Additionally, a new regression test was added to verify correct factorization of a very large number (15111234931751377131713914373267893176342349831) to ensure the utility handles large inputs properly. --- Cargo.lock | 39 --------------------------- src/uu/factor/Cargo.toml | 4 --- src/uu/factor/benches/factor_bench.rs | 23 ---------------- tests/by-util/test_factor.rs | 12 +++++++++ 4 files changed, 12 insertions(+), 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fcdd760193f..300513a18a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1600,37 +1600,6 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" -[[package]] -name = "jemalloc-ctl" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cffc705424a344c054e135d12ee591402f4539245e8bbd64e6c9eaa9458b63c" -dependencies = [ - "jemalloc-sys", - "libc", - "paste", -] - -[[package]] -name = "jemalloc-sys" -version = "0.5.4+5.3.0-patched" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac6c1946e1cea1788cbfde01c993b52a10e2da07f4bac608228d1bed20bfebf2" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "jemallocator" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0de374a9f8e63150e6f5e8a60cc14c668226d7a347d8aee1a45766e3c4dd3bc" -dependencies = [ - "jemalloc-sys", - "libc", -] - [[package]] name = "jiff" version = "0.2.18" @@ -2108,12 +2077,6 @@ dependencies = [ "winnow", ] -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - [[package]] name = "phf" version = "0.13.1" @@ -3418,8 +3381,6 @@ dependencies = [ "clap", "codspeed-divan-compat", "fluent", - "jemalloc-ctl", - "jemallocator", "num-bigint", "num-prime", "num-traits", diff --git a/src/uu/factor/Cargo.toml b/src/uu/factor/Cargo.toml index 3f86c8cb22f..15d09f7a01b 100644 --- a/src/uu/factor/Cargo.toml +++ b/src/uu/factor/Cargo.toml @@ -33,10 +33,6 @@ path = "src/main.rs" divan = { workspace = true } uucore = { workspace = true, features = ["benchmark"] } -[target.'cfg(any(target_os = "linux", target_os = "macos", target_os = "freebsd"))'.dev-dependencies] -jemalloc-ctl = { workspace = true } -jemallocator = { workspace = true } - [lib] path = "src/factor.rs" diff --git a/src/uu/factor/benches/factor_bench.rs b/src/uu/factor/benches/factor_bench.rs index d212fb642c3..952ea09a616 100644 --- a/src/uu/factor/benches/factor_bench.rs +++ b/src/uu/factor/benches/factor_bench.rs @@ -9,27 +9,6 @@ use divan::{Bencher, black_box}; use uu_factor::uumain; use uucore::benchmark::run_util_function; -#[cfg(any(target_os = "linux", target_os = "macos", target_os = "freebsd"))] -use jemallocator::Jemalloc; - -#[cfg(any(target_os = "linux", target_os = "macos", target_os = "freebsd"))] -#[global_allocator] -static ALLOC: Jemalloc = Jemalloc; - -#[cfg(any(target_os = "linux", target_os = "macos", target_os = "freebsd"))] -fn log_jemalloc_stats(label: &str) { - use jemalloc_ctl::{epoch, stats}; - - epoch::advance().unwrap(); - let allocated = stats::allocated::read().unwrap(); - let resident = stats::resident::read().unwrap(); - - println!("jemalloc {label}: allocated={allocated} bytes, resident={resident} bytes"); -} - -#[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "freebsd")))] -fn log_jemalloc_stats(_label: &str) {} - /// Benchmark multiple u64 digits #[divan::bench(args = [(2)])] fn factor_multiple_u64s(bencher: Bencher, start_num: u64) { @@ -37,11 +16,9 @@ fn factor_multiple_u64s(bencher: Bencher, start_num: u64) { // this is a range of 5000 different u128 integers .with_inputs(|| (start_num, start_num + 2500)) .bench_values(|(start_u64, end_u64)| { - log_jemalloc_stats("before factor_multiple_u64s"); for u64_digit in start_u64..=end_u64 { black_box(run_util_function(uumain, &[&u64_digit.to_string()])); } - log_jemalloc_stats("after factor_multiple_u64s"); }); } diff --git a/tests/by-util/test_factor.rs b/tests/by-util/test_factor.rs index 0a9e6c3e5ea..d64a9e03ad2 100644 --- a/tests/by-util/test_factor.rs +++ b/tests/by-util/test_factor.rs @@ -169,6 +169,18 @@ fn test_cli_args() { .stdout_contains("9: 3 3"); } +#[test] +fn test_large_regression_number() { + let n = "15111234931751377131713914373267893176342349831"; + new_ucmd!() + .arg(n) + .timeout(Duration::from_secs(240)) + .succeeds() + .stdout_is(format!( + "{n}: 29 127 115319 1971799 82465494029827 218807630881735711\n" + )); +} + #[test] fn test_random() { let log_num_primes = f64::from(u32::try_from(NUM_PRIMES).unwrap()).log2().ceil(); From f53a563b48b124b3df37e0a132c6c46e319e285e Mon Sep 17 00:00:00 2001 From: mattsu Date: Fri, 13 Feb 2026 12:13:05 +0900 Subject: [PATCH 10/11] refactor: add numeric separators to improve readability of large integer literals This commit adds numeric separators (`_`) to large integer literals in the factor.rs file to improve code readability. The changes include: - Adding separators to base arrays for Miller-Rabin primality testing - Adding separators to LCG constants used in Pollard's rho algorithm - Adding separators to LCG default seed value These changes make the large numeric values easier to read and understand without changing any functionality. --- src/uu/factor/src/factor.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 6e408f5b928..49625de37a3 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -171,7 +171,7 @@ fn is_probable_prime(candidate: &BigUint) -> bool { let bases_32: [u64; 3] = [2, 7, 61]; let bases_64: [u64; 12] = [ - 2, 325, 9375, 28178, 450775, 9780504, 1795265022, 3, 5, 7, 11, 13, + 2, 325, 9375, 28178, 450_775, 9_780_504, 1_795_265_022, 3, 5, 7, 11, 13, ]; let bases: Vec = if candidate.bits() <= 32 { @@ -316,8 +316,8 @@ fn pollard_rho(composite: &BigUint) -> Option { } // Use a deterministic LCG to generate parameter sequences. - const LCG_MULTIPLIER: u128 = 6364136223846793005; - const LCG_INCREMENT: u128 = 1442695040888963407; + const LCG_MULTIPLIER: u128 = 6_364_136_223_846_793_005; + const LCG_INCREMENT: u128 = 1_442_695_040_888_963_407; fn lcg_next(x: &mut u128) { *x = x.wrapping_mul(LCG_MULTIPLIER).wrapping_add(LCG_INCREMENT); @@ -330,7 +330,7 @@ fn pollard_rho(composite: &BigUint) -> Option { let max_tries: u64 = 16; let max_iter: u64 = (bits * bits).clamp(10_000, 200_000); - const LCG_DEFAULT_SEED: u128 = 0x9e3779b97f4a7c15; + const LCG_DEFAULT_SEED: u128 = 0x9e37_79b9_7f4a_7c15; let mut seed: u128 = LCG_DEFAULT_SEED; for _try in 0..max_tries { From f7f0581ae268183e2bace2385822d4e9a8196e04 Mon Sep 17 00:00:00 2001 From: mattsu Date: Fri, 13 Feb 2026 12:15:04 +0900 Subject: [PATCH 11/11] refactor: improve readability of bases_64 array initialization Reformatted the bases_64 array initialization to use proper indentation and line breaks for better code readability. The array elements are now aligned vertically, making the code easier to read and maintain. --- src/uu/factor/src/factor.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 49625de37a3..7df6375a9ca 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -171,7 +171,18 @@ fn is_probable_prime(candidate: &BigUint) -> bool { let bases_32: [u64; 3] = [2, 7, 61]; let bases_64: [u64; 12] = [ - 2, 325, 9375, 28178, 450_775, 9_780_504, 1_795_265_022, 3, 5, 7, 11, 13, + 2, + 325, + 9375, + 28178, + 450_775, + 9_780_504, + 1_795_265_022, + 3, + 5, + 7, + 11, + 13, ]; let bases: Vec = if candidate.bits() <= 32 {