lib/math/div64.c - pub/scm/linux/kernel/git/stable/linux-stable.git - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
  *
  * Based on former do_div() implementation from asm-parisc/div64.h:
  *	Copyright (C) 1999 Hewlett-Packard Co
  *	Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
  *
  *
  * Generic C version of 64bit/32bit division and modulo, with
  * 64bit result and 32bit remainder.
  *
  * The fast case for (n>>32 == 0) is handled inline by do_div().
  *
  * Code generated for this function might be very inefficient
  * for some CPUs. __div64_32() can be overridden by linking arch-specific
  * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S
  * or by defining a preprocessor macro in arch/include/asm/div64.h.
  */

 #include <linux/bitops.h>
 #include <linux/export.h>
 #include <linux/math.h>
 #include <linux/math64.h>
 #include <linux/minmax.h>
 #include <linux/log2.h>

 /* Not needed on 64bit architectures */
 #if BITS_PER_LONG == 32

 #ifndef __div64_32
 uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
 {
 	uint64_t rem = *n;
 	uint64_t b = base;
 	uint64_t res, d = 1;
 	uint32_t high = rem >> 32;

 	/* Reduce the thing a bit first */
 	res = 0;
 	if (high >= base) {
 		high /= base;
 		res = (uint64_t) high << 32;
 		rem -= (uint64_t) (high*base) << 32;
 	}

 	while ((int64_t)b > 0 && b < rem) {
 		b = b+b;
 		d = d+d;
 	}

 	do {
 		if (rem >= b) {
 			rem -= b;
 			res += d;
 		}
 		b >>= 1;
 		d >>= 1;
 	} while (d);

 	*n = res;
 	return rem;
 }
 EXPORT_SYMBOL(__div64_32);
 #endif

 #ifndef div_s64_rem
 s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
 {
 	u64 quotient;

 	if (dividend < 0) {
 		quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
 		*remainder = -*remainder;
 		if (divisor > 0)
 			quotient = -quotient;
 	} else {
 		quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
 		if (divisor < 0)
 			quotient = -quotient;
 	}
 	return quotient;
 }
 EXPORT_SYMBOL(div_s64_rem);
 #endif

 /*
  * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
  * @dividend:	64bit dividend
  * @divisor:	64bit divisor
  * @remainder:  64bit remainder
  *
  * This implementation is a comparable to algorithm used by div64_u64.
  * But this operation, which includes math for calculating the remainder,
  * is kept distinct to avoid slowing down the div64_u64 operation on 32bit
  * systems.
  */
 #ifndef div64_u64_rem
 u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
 {
 	u32 high = divisor >> 32;
 	u64 quot;

 	if (high == 0) {
 		u32 rem32;
 		quot = div_u64_rem(dividend, divisor, &rem32);
 		*remainder = rem32;
 	} else {
 		int n = fls(high);
 		quot = div_u64(dividend >> n, divisor >> n);

 		if (quot != 0)
 			quot--;

 		*remainder = dividend - quot * divisor;
 		if (*remainder >= divisor) {
 			quot++;
 			*remainder -= divisor;
 		}
 	}

 	return quot;
 }
 EXPORT_SYMBOL(div64_u64_rem);
 #endif

 /*
  * div64_u64 - unsigned 64bit divide with 64bit divisor
  * @dividend:	64bit dividend
  * @divisor:	64bit divisor
  *
  * This implementation is a modified version of the algorithm proposed
  * by the book 'Hacker's Delight'.  The original source and full proof
  * can be found here and is available for use without restriction.
  *
  * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
  */
 #ifndef div64_u64
 u64 div64_u64(u64 dividend, u64 divisor)
 {
 	u32 high = divisor >> 32;
 	u64 quot;

 	if (high == 0) {
 		quot = div_u64(dividend, divisor);
 	} else {
 		int n = fls(high);
 		quot = div_u64(dividend >> n, divisor >> n);

 		if (quot != 0)
 			quot--;
 		if ((dividend - quot * divisor) >= divisor)
 			quot++;
 	}

 	return quot;
 }
 EXPORT_SYMBOL(div64_u64);
 #endif

 #ifndef div64_s64
 s64 div64_s64(s64 dividend, s64 divisor)
 {
 	s64 quot, t;

 	quot = div64_u64(abs(dividend), abs(divisor));
 	t = (dividend ^ divisor) >> 63;

 	return (quot ^ t) - t;
 }
 EXPORT_SYMBOL(div64_s64);
 #endif

 #endif /* BITS_PER_LONG == 32 */

 /*
  * Iterative div/mod for use when dividend is not expected to be much
  * bigger than divisor.
  */
 #ifndef iter_div_u64_rem
 u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
 {
 	return __iter_div_u64_rem(dividend, divisor, remainder);
 }
 EXPORT_SYMBOL(iter_div_u64_rem);
 #endif

 #if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64)

 #define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c)

 #if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64)
 static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
 {
 	/* native 64x64=128 bits multiplication */
 	u128 prod = (u128)a * b + c;

 	*p_lo = prod;
 	return prod >> 64;
 }
 #else
 static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
 {
 	/* perform a 64x64=128 bits multiplication in 32bit chunks */
 	u64 x, y, z;

 	/* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */
 	x = mul_add(a, b, c);
 	y = mul_add(a, b >> 32, c >> 32);
 	y = add_u64_u32(y, x >> 32);
 	z = mul_add(a >> 32, b >> 32, y >> 32);
 	y = mul_add(a >> 32, b, y);
 	*p_lo = (y << 32) + (u32)x;
 	return add_u64_u32(z, y >> 32);
 }
 #endif

 #ifndef BITS_PER_ITER
 #define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16)
 #endif

 #if BITS_PER_ITER == 32
 #define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c)
 #define add_u64_long(a, b) ((a) + (b))
 #else
 #undef BITS_PER_ITER
 #define BITS_PER_ITER 16
 static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c)
 {
 	u64 n_lo = mul_add(a, b, c);
 	u64 n_med = mul_add(a >> 32, b, c >> 32);

 	n_med = add_u64_u32(n_med, n_lo >> 32);
 	*p_lo = n_med << 32 | (u32)n_lo;
 	return n_med >> 32;
 }

 #define add_u64_long(a, b) add_u64_u32(a, b)
 #endif

 u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d)
 {
 	unsigned long d_msig, q_digit;
 	unsigned int reps, d_z_hi;
 	u64 quotient, n_lo, n_hi;
 	u32 overflow;

 	n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c);

 	if (!n_hi)
 		return div64_u64(n_lo, d);

 	if (unlikely(n_hi >= d)) {
 		/* trigger runtime exception if divisor is zero */
 		if (d == 0) {
 			unsigned long zero = 0;

 			OPTIMIZER_HIDE_VAR(zero);
 			return ~0UL/zero;
 		}
 		/* overflow: result is unrepresentable in a u64 */
 		return ~0ULL;
 	}

 	/* Left align the divisor, shifting the dividend to match */
 	d_z_hi = __builtin_clzll(d);
 	if (d_z_hi) {
 		d <<= d_z_hi;
 		n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi);
 		n_lo <<= d_z_hi;
 	}

 	reps = 64 / BITS_PER_ITER;
 	/* Optimise loop count for small dividends */
 	if (!(u32)(n_hi >> 32)) {
 		reps -= 32 / BITS_PER_ITER;
 		n_hi = n_hi << 32 | n_lo >> 32;
 		n_lo <<= 32;
 	}
 #if BITS_PER_ITER == 16
 	if (!(u32)(n_hi >> 48)) {
 		reps--;
 		n_hi = add_u64_u32(n_hi << 16, n_lo >> 48);
 		n_lo <<= 16;
 	}
 #endif

 	/* Invert the dividend so we can use add instead of subtract. */
 	n_lo = ~n_lo;
 	n_hi = ~n_hi;

 	/*
 	 * Get the most significant BITS_PER_ITER bits of the divisor.
 	 * This is used to get a low 'guestimate' of the quotient digit.
 	 */
 	d_msig = (d >> (64 - BITS_PER_ITER)) + 1;

 	/*
 	 * Now do a 'long division' with BITS_PER_ITER bit 'digits'.
 	 * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits.
 	 * The worst case is dividing ~0 by 0x8000 which requires two subtracts.
 	 */
 	quotient = 0;
 	while (reps--) {
 		q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig;
 		/* Shift 'n' left to align with the product q_digit * d */
 		overflow = n_hi >> (64 - BITS_PER_ITER);
 		n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER));
 		n_lo <<= BITS_PER_ITER;
 		/* Add product to negated divisor */
 		overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi);
 		/* Adjust for the q_digit 'guestimate' being low */
 		while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) {
 			q_digit++;
 			n_hi += d;
 			overflow += n_hi < d;
 		}
 		quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit);
 	}

 	/*
 	 * The above only ensures the remainder doesn't overflow,
 	 * it can still be possible to add (aka subtract) another copy
 	 * of the divisor.
 	 */
 	if ((n_hi + d) > n_hi)
 		quotient++;
 	return quotient;
 }
 #if !defined(test_mul_u64_add_u64_div_u64)
 EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
 #endif
 #endif
	// SPDX-License-Identifier: GPL-2.0
	/*
	* Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
	*
	* Based on former do_div() implementation from asm-parisc/div64.h:
	* Copyright (C) 1999 Hewlett-Packard Co
	* Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
	*
	*
	* Generic C version of 64bit/32bit division and modulo, with
	* 64bit result and 32bit remainder.
	*
	* The fast case for (n>>32 == 0) is handled inline by do_div().
	*
	* Code generated for this function might be very inefficient
	* for some CPUs. __div64_32() can be overridden by linking arch-specific
	* assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S
	* or by defining a preprocessor macro in arch/include/asm/div64.h.
	*/

	#include <linux/bitops.h>
	#include <linux/export.h>
	#include <linux/math.h>
	#include <linux/math64.h>
	#include <linux/minmax.h>
	#include <linux/log2.h>

	/* Not needed on 64bit architectures */
	#if BITS_PER_LONG == 32

	#ifndef __div64_32
	uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
	{
	uint64_t rem = *n;
	uint64_t b = base;
	uint64_t res, d = 1;
	uint32_t high = rem >> 32;

	/* Reduce the thing a bit first */
	res = 0;
	if (high >= base) {
	high /= base;
	res = (uint64_t) high << 32;
	rem -= (uint64_t) (high*base) << 32;
	}

	while ((int64_t)b > 0 && b < rem) {
	b = b+b;
	d = d+d;
	}

	do {
	if (rem >= b) {
	rem -= b;
	res += d;
	}
	b >>= 1;
	d >>= 1;
	} while (d);

	*n = res;
	return rem;
	}
	EXPORT_SYMBOL(__div64_32);
	#endif

	#ifndef div_s64_rem
	s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
	{
	u64 quotient;

	if (dividend < 0) {
	quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
	remainder = -remainder;
	if (divisor > 0)
	quotient = -quotient;
	} else {
	quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
	if (divisor < 0)
	quotient = -quotient;
	}
	return quotient;
	}
	EXPORT_SYMBOL(div_s64_rem);
	#endif

	/*
	* div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
	* @dividend: 64bit dividend
	* @divisor: 64bit divisor
	* @remainder: 64bit remainder
	*
	* This implementation is a comparable to algorithm used by div64_u64.
	* But this operation, which includes math for calculating the remainder,
	* is kept distinct to avoid slowing down the div64_u64 operation on 32bit
	* systems.
	*/
	#ifndef div64_u64_rem
	u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
	{
	u32 high = divisor >> 32;
	u64 quot;

	if (high == 0) {
	u32 rem32;
	quot = div_u64_rem(dividend, divisor, &rem32);
	*remainder = rem32;
	} else {
	int n = fls(high);
	quot = div_u64(dividend >> n, divisor >> n);

	if (quot != 0)
	quot--;

	remainder = dividend - quot divisor;
	if (*remainder >= divisor) {
	quot++;
	*remainder -= divisor;
	}
	}

	return quot;
	}
	EXPORT_SYMBOL(div64_u64_rem);
	#endif

	/*
	* div64_u64 - unsigned 64bit divide with 64bit divisor
	* @dividend: 64bit dividend
	* @divisor: 64bit divisor
	*
	* This implementation is a modified version of the algorithm proposed
	* by the book 'Hacker's Delight'. The original source and full proof
	* can be found here and is available for use without restriction.
	*
	* 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
	*/
	#ifndef div64_u64
	u64 div64_u64(u64 dividend, u64 divisor)
	{
	u32 high = divisor >> 32;
	u64 quot;

	if (high == 0) {
	quot = div_u64(dividend, divisor);
	} else {
	int n = fls(high);
	quot = div_u64(dividend >> n, divisor >> n);

	if (quot != 0)
	quot--;
	if ((dividend - quot * divisor) >= divisor)
	quot++;
	}

	return quot;
	}
	EXPORT_SYMBOL(div64_u64);
	#endif

	#ifndef div64_s64
	s64 div64_s64(s64 dividend, s64 divisor)
	{
	s64 quot, t;

	quot = div64_u64(abs(dividend), abs(divisor));
	t = (dividend ^ divisor) >> 63;

	return (quot ^ t) - t;
	}
	EXPORT_SYMBOL(div64_s64);
	#endif

	#endif /* BITS_PER_LONG == 32 */

	/*
	* Iterative div/mod for use when dividend is not expected to be much
	* bigger than divisor.
	*/
	#ifndef iter_div_u64_rem
	u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
	{
	return __iter_div_u64_rem(dividend, divisor, remainder);
	}
	EXPORT_SYMBOL(iter_div_u64_rem);
	#endif

	#if !defined(mul_u64_add_u64_div_u64) \|\| defined(test_mul_u64_add_u64_div_u64)

	#define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c)

	#if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64)
	static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
	{
	/* native 64x64=128 bits multiplication */
	u128 prod = (u128)a * b + c;

	*p_lo = prod;
	return prod >> 64;
	}
	#else
	static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
	{
	/* perform a 64x64=128 bits multiplication in 32bit chunks */
	u64 x, y, z;

	/* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */
	x = mul_add(a, b, c);
	y = mul_add(a, b >> 32, c >> 32);
	y = add_u64_u32(y, x >> 32);
	z = mul_add(a >> 32, b >> 32, y >> 32);
	y = mul_add(a >> 32, b, y);
	*p_lo = (y << 32) + (u32)x;
	return add_u64_u32(z, y >> 32);
	}
	#endif

	#ifndef BITS_PER_ITER
	#define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16)
	#endif

	#if BITS_PER_ITER == 32
	#define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c)
	#define add_u64_long(a, b) ((a) + (b))
	#else
	#undef BITS_PER_ITER
	#define BITS_PER_ITER 16
	static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c)
	{
	u64 n_lo = mul_add(a, b, c);
	u64 n_med = mul_add(a >> 32, b, c >> 32);

	n_med = add_u64_u32(n_med, n_lo >> 32);
	*p_lo = n_med << 32 \| (u32)n_lo;
	return n_med >> 32;
	}

	#define add_u64_long(a, b) add_u64_u32(a, b)
	#endif

	u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d)
	{
	unsigned long d_msig, q_digit;
	unsigned int reps, d_z_hi;
	u64 quotient, n_lo, n_hi;
	u32 overflow;

	n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c);

	if (!n_hi)
	return div64_u64(n_lo, d);

	if (unlikely(n_hi >= d)) {
	/* trigger runtime exception if divisor is zero */
	if (d == 0) {
	unsigned long zero = 0;

	OPTIMIZER_HIDE_VAR(zero);
	return ~0UL/zero;
	}
	/* overflow: result is unrepresentable in a u64 */
	return ~0ULL;
	}

	/* Left align the divisor, shifting the dividend to match */
	d_z_hi = __builtin_clzll(d);
	if (d_z_hi) {
	d <<= d_z_hi;
	n_hi = n_hi << d_z_hi \| n_lo >> (64 - d_z_hi);
	n_lo <<= d_z_hi;
	}

	reps = 64 / BITS_PER_ITER;
	/* Optimise loop count for small dividends */
	if (!(u32)(n_hi >> 32)) {
	reps -= 32 / BITS_PER_ITER;
	n_hi = n_hi << 32 \| n_lo >> 32;
	n_lo <<= 32;
	}
	#if BITS_PER_ITER == 16
	if (!(u32)(n_hi >> 48)) {
	reps--;
	n_hi = add_u64_u32(n_hi << 16, n_lo >> 48);
	n_lo <<= 16;
	}
	#endif

	/* Invert the dividend so we can use add instead of subtract. */
	n_lo = ~n_lo;
	n_hi = ~n_hi;

	/*
	* Get the most significant BITS_PER_ITER bits of the divisor.
	* This is used to get a low 'guestimate' of the quotient digit.
	*/
	d_msig = (d >> (64 - BITS_PER_ITER)) + 1;

	/*
	* Now do a 'long division' with BITS_PER_ITER bit 'digits'.
	* The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits.
	* The worst case is dividing ~0 by 0x8000 which requires two subtracts.
	*/
	quotient = 0;
	while (reps--) {
	q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig;
	/* Shift 'n' left to align with the product q_digit * d */
	overflow = n_hi >> (64 - BITS_PER_ITER);
	n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER));
	n_lo <<= BITS_PER_ITER;
	/* Add product to negated divisor */
	overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi);
	/* Adjust for the q_digit 'guestimate' being low */
	while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) {
	q_digit++;
	n_hi += d;
	overflow += n_hi < d;
	}
	quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit);
	}

	/*
	* The above only ensures the remainder doesn't overflow,
	* it can still be possible to add (aka subtract) another copy
	* of the divisor.
	*/
	if ((n_hi + d) > n_hi)
	quotient++;
	return quotient;
	}
	#if !defined(test_mul_u64_add_u64_div_u64)
	EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
	#endif
	#endif