linux/arch/mips/math-emu/dp_maddf.c

/*
 * IEEE754 floating point arithmetic
 * double precision: MADDF.f (Fused Multiply Add)
 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
 *
 * MIPS floating point support
 * Copyright (C) 2015 Imagination Technologies, Ltd.
 * Author: Markos Chandras <markos.chandras@imgtec.com>
 *
 *  This program is free software; you can distribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the
 *  Free Software Foundation; version 2 of the License.
 */

#include "ieee754dp.h"

enum maddf_flags {
	maddf_negate_product	= 1 << 0,
};

static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
				 union ieee754dp y, enum maddf_flags flags)
{
	int re;
	int rs;
	u64 rm;
	unsigned lxm;
	unsigned hxm;
	unsigned lym;
	unsigned hym;
	u64 lrm;
	u64 hrm;
	u64 t;
	u64 at;
	int s;

	COMPXDP;
	COMPYDP;
	COMPZDP;

	EXPLODEXDP;
	EXPLODEYDP;
	EXPLODEZDP;

	FLUSHXDP;
	FLUSHYDP;
	FLUSHZDP;

	ieee754_clearcx();

	switch (zc) {
	case IEEE754_CLASS_SNAN:
		ieee754_setcx(IEEE754_INVALID_OPERATION);
		return ieee754dp_nanxcpt(z);
	case IEEE754_CLASS_DNORM:
		DPDNORMZ;
	/* QNAN is handled separately below */
	}

	switch (CLPAIR(xc, yc)) {
	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
		return ieee754dp_nanxcpt(y);

	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
		return ieee754dp_nanxcpt(x);

	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
		return y;

	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
		return x;


	/*
	 * Infinity handling
	 */
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
		if (zc == IEEE754_CLASS_QNAN)
			return z;
		ieee754_setcx(IEEE754_INVALID_OPERATION);
		return ieee754dp_indef();

	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
		if (zc == IEEE754_CLASS_QNAN)
			return z;
		return ieee754dp_inf(xs ^ ys);

	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
		if (zc == IEEE754_CLASS_INF)
			return ieee754dp_inf(zs);
		/* Multiplication is 0 so just return z */
		return z;

	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
		DPDNORMX;

	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
		if (zc == IEEE754_CLASS_QNAN)
			return z;
		else if (zc == IEEE754_CLASS_INF)
			return ieee754dp_inf(zs);
		DPDNORMY;
		break;

	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
		if (zc == IEEE754_CLASS_QNAN)
			return z;
		else if (zc == IEEE754_CLASS_INF)
			return ieee754dp_inf(zs);
		DPDNORMX;
		break;

	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
		if (zc == IEEE754_CLASS_QNAN)
			return z;
		else if (zc == IEEE754_CLASS_INF)
			return ieee754dp_inf(zs);
		/* fall through to real computations */
	}

	/* Finally get to do some computation */

	/*
	 * Do the multiplication bit first
	 *
	 * rm = xm * ym, re = xe + ye basically
	 *
	 * At this point xm and ym should have been normalized.
	 */
	assert(xm & DP_HIDDEN_BIT);
	assert(ym & DP_HIDDEN_BIT);

	re = xe + ye;
	rs = xs ^ ys;
	if (flags & maddf_negate_product)
		rs ^= 1;

	/* shunt to top of word */
	xm <<= 64 - (DP_FBITS + 1);
	ym <<= 64 - (DP_FBITS + 1);

	/*
	 * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
	 */

	/* 32 * 32 => 64 */
#define DPXMULT(x, y)	((u64)(x) * (u64)y)

	lxm = xm;
	hxm = xm >> 32;
	lym = ym;
	hym = ym >> 32;

	lrm = DPXMULT(lxm, lym);
	hrm = DPXMULT(hxm, hym);

	t = DPXMULT(lxm, hym);

	at = lrm + (t << 32);
	hrm += at < lrm;
	lrm = at;

	hrm = hrm + (t >> 32);

	t = DPXMULT(hxm, lym);

	at = lrm + (t << 32);
	hrm += at < lrm;
	lrm = at;

	hrm = hrm + (t >> 32);

	rm = hrm | (lrm != 0);

	/*
	 * Sticky shift down to normal rounding precision.
	 */
	if ((s64) rm < 0) {
		rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
		     ((rm << (DP_FBITS + 1 + 3)) != 0);
		re++;
	} else {
		rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
		     ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
	}
	assert(rm & (DP_HIDDEN_BIT << 3));

	/* And now the addition */
	assert(zm & DP_HIDDEN_BIT);

	/*
	 * Provide guard,round and stick bit space.
	 */
	zm <<= 3;

	if (ze > re) {
		/*
		 * Have to shift y fraction right to align.
		 */
		s = ze - re;
		rm = XDPSRS(rm, s);
		re += s;
	} else if (re > ze) {
		/*
		 * Have to shift x fraction right to align.
		 */
		s = re - ze;
		zm = XDPSRS(zm, s);
		ze += s;
	}
	assert(ze == re);
	assert(ze <= DP_EMAX);

	if (zs == rs) {
		/*
		 * Generate 28 bit result of adding two 27 bit numbers
		 * leaving result in xm, xs and xe.
		 */
		zm = zm + rm;

		if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */
			zm = XDPSRS1(zm);
			ze++;
		}
	} else {
		if (zm >= rm) {
			zm = zm - rm;
		} else {
			zm = rm - zm;
			zs = rs;
		}
		if (zm == 0)
			return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);

		/*
		 * Normalize to rounding precision.
		 */
		while ((zm >> (DP_FBITS + 3)) == 0) {
			zm <<= 1;
			ze--;
		}
	}

	return ieee754dp_format(zs, ze, zm);
}

union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
				union ieee754dp y)
{
	return _dp_maddf(z, x, y, 0);
}

union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
				union ieee754dp y)
{
	return _dp_maddf(z, x, y, maddf_negate_product);
}
MIPS: math-emu: Add support for the MIPS R6 MADDF FPU instruction MIPS R6 introduced the following instruction: Floating Point Fused Multiply Add: MADDF.fmt To perform a fused multiply-add of FP values. MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10956/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2015-08-13 09:56:31 +02:00			`/*`
			`* IEEE754 floating point arithmetic`
			`* double precision: MADDF.f (Fused Multiply Add)`
			`* MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])`
			`*`
			`* MIPS floating point support`
			`* Copyright (C) 2015 Imagination Technologies, Ltd.`
			`* Author: Markos Chandras <markos.chandras@imgtec.com>`
			`*`
			`* This program is free software; you can distribute it and/or modify it`
			`* under the terms of the GNU General Public License as published by the`
			`* Free Software Foundation; version 2 of the License.`
			`*/`

			`#include "ieee754dp.h"`

MIPS: math-emu: Unify ieee754dp_m{add,sub}f The code for emulating MIPSr6 madd.d & msub.d instructions has previously been implemented as 2 different functions, namely ieee754dp_maddf & ieee754dp_msubf. The difference in behaviour of these 2 instructions is merely the sign of the product, so we can easily share the code implementing them. Do this for the double precision variant, removing the original ieee754dp_msubf in favor of reusing the code from ieee754dp_maddf. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13155/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2016-04-21 14:04:50 +01:00			`enum maddf_flags {`
			`maddf_negate_product = 1 << 0,`
			`};`

			`static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,`
			`union ieee754dp y, enum maddf_flags flags)`
MIPS: math-emu: Add support for the MIPS R6 MADDF FPU instruction MIPS R6 introduced the following instruction: Floating Point Fused Multiply Add: MADDF.fmt To perform a fused multiply-add of FP values. MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10956/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2015-08-13 09:56:31 +02:00			`{`
			`int re;`
			`int rs;`
			`u64 rm;`
			`unsigned lxm;`
			`unsigned hxm;`
			`unsigned lym;`
			`unsigned hym;`
			`u64 lrm;`
			`u64 hrm;`
			`u64 t;`
			`u64 at;`
			`int s;`

			`COMPXDP;`
			`COMPYDP;`
MIPS: math-emu: Add z argument macros Introduce macros for handling the "z" argument to maddf & msubf, making its handling consistent with that of the "x" & "y" arguments rather than open-coding equivalents. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Maciej W. Rozycki <macro@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13156/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2016-04-21 14:04:51 +01:00			`COMPZDP;`
MIPS: math-emu: Add support for the MIPS R6 MADDF FPU instruction MIPS R6 introduced the following instruction: Floating Point Fused Multiply Add: MADDF.fmt To perform a fused multiply-add of FP values. MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10956/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2015-08-13 09:56:31 +02:00
			`EXPLODEXDP;`
			`EXPLODEYDP;`
MIPS: math-emu: Add z argument macros Introduce macros for handling the "z" argument to maddf & msubf, making its handling consistent with that of the "x" & "y" arguments rather than open-coding equivalents. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Maciej W. Rozycki <macro@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13156/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2016-04-21 14:04:51 +01:00			`EXPLODEZDP;`
MIPS: math-emu: Add support for the MIPS R6 MADDF FPU instruction MIPS R6 introduced the following instruction: Floating Point Fused Multiply Add: MADDF.fmt To perform a fused multiply-add of FP values. MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10956/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2015-08-13 09:56:31 +02:00
			`FLUSHXDP;`
			`FLUSHYDP;`
MIPS: math-emu: Add z argument macros Introduce macros for handling the "z" argument to maddf & msubf, making its handling consistent with that of the "x" & "y" arguments rather than open-coding equivalents. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Maciej W. Rozycki <macro@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13156/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2016-04-21 14:04:51 +01:00			`FLUSHZDP;`
MIPS: math-emu: Add support for the MIPS R6 MADDF FPU instruction MIPS R6 introduced the following instruction: Floating Point Fused Multiply Add: MADDF.fmt To perform a fused multiply-add of FP values. MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10956/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2015-08-13 09:56:31 +02:00
			`ieee754_clearcx();`

			`switch (zc) {`
			`case IEEE754_CLASS_SNAN:`
			`ieee754_setcx(IEEE754_INVALID_OPERATION);`
			`return ieee754dp_nanxcpt(z);`
			`case IEEE754_CLASS_DNORM:`
MIPS: math-emu: Add z argument macros Introduce macros for handling the "z" argument to maddf & msubf, making its handling consistent with that of the "x" & "y" arguments rather than open-coding equivalents. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Maciej W. Rozycki <macro@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13156/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2016-04-21 14:04:51 +01:00			`DPDNORMZ;`
MIPS: math-emu: Add support for the MIPS R6 MADDF FPU instruction MIPS R6 introduced the following instruction: Floating Point Fused Multiply Add: MADDF.fmt To perform a fused multiply-add of FP values. MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10956/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2015-08-13 09:56:31 +02:00			`/* QNAN is handled separately below */`
			`}`

			`switch (CLPAIR(xc, yc)) {`
			`case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):`
			`case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):`
			`case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):`
			`case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):`
			`case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):`
			`return ieee754dp_nanxcpt(y);`

			`case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):`
			`case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):`
			`case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):`
			`case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):`
			`case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):`
			`case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):`
			`return ieee754dp_nanxcpt(x);`

			`case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):`
			`case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):`
			`case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):`
			`case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):`
			`return y;`

			`case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):`
			`case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):`
			`case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):`
			`case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):`
			`case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):`
			`return x;`


			`/*`
			`* Infinity handling`
			`*/`
			`case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):`
			`case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):`
			`if (zc == IEEE754_CLASS_QNAN)`
			`return z;`
			`ieee754_setcx(IEEE754_INVALID_OPERATION);`
			`return ieee754dp_indef();`

			`case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):`
			`case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):`
			`case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):`
			`case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):`
			`case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):`
			`if (zc == IEEE754_CLASS_QNAN)`
			`return z;`
			`return ieee754dp_inf(xs ^ ys);`

			`case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):`
			`case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):`
			`case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):`
			`case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):`
			`case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):`
			`if (zc == IEEE754_CLASS_INF)`
			`return ieee754dp_inf(zs);`
			`/* Multiplication is 0 so just return z */`
			`return z;`

			`case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):`
			`DPDNORMX;`

			`case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):`
			`if (zc == IEEE754_CLASS_QNAN)`
			`return z;`
			`else if (zc == IEEE754_CLASS_INF)`
			`return ieee754dp_inf(zs);`
			`DPDNORMY;`
			`break;`

			`case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):`
			`if (zc == IEEE754_CLASS_QNAN)`
			`return z;`
			`else if (zc == IEEE754_CLASS_INF)`
			`return ieee754dp_inf(zs);`
			`DPDNORMX;`
			`break;`

			`case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):`
			`if (zc == IEEE754_CLASS_QNAN)`
			`return z;`
			`else if (zc == IEEE754_CLASS_INF)`
			`return ieee754dp_inf(zs);`
			`/* fall through to real computations */`
			`}`

			`/* Finally get to do some computation */`

			`/*`
			`* Do the multiplication bit first`
			`*`
			`* rm = xm * ym, re = xe + ye basically`
			`*`
			`* At this point xm and ym should have been normalized.`
			`*/`
			`assert(xm & DP_HIDDEN_BIT);`
			`assert(ym & DP_HIDDEN_BIT);`

			`re = xe + ye;`
			`rs = xs ^ ys;`
MIPS: math-emu: Unify ieee754dp_m{add,sub}f The code for emulating MIPSr6 madd.d & msub.d instructions has previously been implemented as 2 different functions, namely ieee754dp_maddf & ieee754dp_msubf. The difference in behaviour of these 2 instructions is merely the sign of the product, so we can easily share the code implementing them. Do this for the double precision variant, removing the original ieee754dp_msubf in favor of reusing the code from ieee754dp_maddf. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13155/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2016-04-21 14:04:50 +01:00			`if (flags & maddf_negate_product)`
			`rs ^= 1;`
MIPS: math-emu: Add support for the MIPS R6 MADDF FPU instruction MIPS R6 introduced the following instruction: Floating Point Fused Multiply Add: MADDF.fmt To perform a fused multiply-add of FP values. MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10956/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2015-08-13 09:56:31 +02:00
			`/* shunt to top of word */`
			`xm <<= 64 - (DP_FBITS + 1);`
			`ym <<= 64 - (DP_FBITS + 1);`

			`/*`
MIPS: math-emu: Fix bit-width in ieee754dp_{mul, maddf, msubf} comments A comment in ieee754dp_mul indicates that the code is about to perform a 32b x 32b multiplication & keep the high 32b of the result. It appears this was copied from the single-precision multiplication code, since the code actually goes on to perform a 64b x 64b multiplication & keep the high 64b of the result. Fix the comment to indicate 64b. It appears also that this comment was copied verbatim along with the rest of the multiplication code into ieee754dp_maddf, which has since been renamed _dp_maddf. Fix the same issue there. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13157/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2016-04-21 14:04:52 +01:00			`* Multiply 64 bits xm, ym to give high 64 bits rm with stickness.`
MIPS: math-emu: Add support for the MIPS R6 MADDF FPU instruction MIPS R6 introduced the following instruction: Floating Point Fused Multiply Add: MADDF.fmt To perform a fused multiply-add of FP values. MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10956/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2015-08-13 09:56:31 +02:00			`*/`

			`/* 32 * 32 => 64 */`
			`#define DPXMULT(x, y) ((u64)(x) * (u64)y)`

			`lxm = xm;`
			`hxm = xm >> 32;`
			`lym = ym;`
			`hym = ym >> 32;`

			`lrm = DPXMULT(lxm, lym);`
			`hrm = DPXMULT(hxm, hym);`

			`t = DPXMULT(lxm, hym);`

			`at = lrm + (t << 32);`
			`hrm += at < lrm;`
			`lrm = at;`

			`hrm = hrm + (t >> 32);`

			`t = DPXMULT(hxm, lym);`

			`at = lrm + (t << 32);`
			`hrm += at < lrm;`
			`lrm = at;`

			`hrm = hrm + (t >> 32);`

			`rm = hrm \| (lrm != 0);`

			`/*`
			`* Sticky shift down to normal rounding precision.`
			`*/`
			`if ((s64) rm < 0) {`
			`rm = (rm >> (64 - (DP_FBITS + 1 + 3))) \|`
			`((rm << (DP_FBITS + 1 + 3)) != 0);`
MIPS: math-emu: Fix code indentation A line incrementing the re variable was indented a level too deep in ieee754dp_mul, making the code unclear to read. Fix the indentation. This appears to have been copied verbatim along with the rest of the multiplication code to ieee754dp_maddf, now _dp_maddf, too so fix the indentation there too. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13158/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2016-04-21 14:04:53 +01:00			`re++;`
MIPS: math-emu: Add support for the MIPS R6 MADDF FPU instruction MIPS R6 introduced the following instruction: Floating Point Fused Multiply Add: MADDF.fmt To perform a fused multiply-add of FP values. MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10956/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2015-08-13 09:56:31 +02:00			`} else {`
			`rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) \|`
			`((rm << (DP_FBITS + 1 + 3 + 1)) != 0);`
			`}`
			`assert(rm & (DP_HIDDEN_BIT << 3));`

			`/* And now the addition */`
			`assert(zm & DP_HIDDEN_BIT);`

			`/*`
			`* Provide guard,round and stick bit space.`
			`*/`
			`zm <<= 3;`

			`if (ze > re) {`
			`/*`
			`* Have to shift y fraction right to align.`
			`*/`
			`s = ze - re;`
			`rm = XDPSRS(rm, s);`
			`re += s;`
			`} else if (re > ze) {`
			`/*`
			`* Have to shift x fraction right to align.`
			`*/`
			`s = re - ze;`
			`zm = XDPSRS(zm, s);`
			`ze += s;`
			`}`
			`assert(ze == re);`
			`assert(ze <= DP_EMAX);`

			`if (zs == rs) {`
			`/*`
			`* Generate 28 bit result of adding two 27 bit numbers`
			`* leaving result in xm, xs and xe.`
			`*/`
			`zm = zm + rm;`

			`if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */`
			`zm = XDPSRS1(zm);`
			`ze++;`
			`}`
			`} else {`
			`if (zm >= rm) {`
			`zm = zm - rm;`
			`} else {`
			`zm = rm - zm;`
			`zs = rs;`
			`}`
			`if (zm == 0)`
			`return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);`

			`/*`
			`* Normalize to rounding precision.`
			`*/`
			`while ((zm >> (DP_FBITS + 3)) == 0) {`
			`zm <<= 1;`
			`ze--;`
			`}`
			`}`

			`return ieee754dp_format(zs, ze, zm);`
			`}`
MIPS: math-emu: Unify ieee754dp_m{add,sub}f The code for emulating MIPSr6 madd.d & msub.d instructions has previously been implemented as 2 different functions, namely ieee754dp_maddf & ieee754dp_msubf. The difference in behaviour of these 2 instructions is merely the sign of the product, so we can easily share the code implementing them. Do this for the double precision variant, removing the original ieee754dp_msubf in favor of reusing the code from ieee754dp_maddf. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13155/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2016-04-21 14:04:50 +01:00
			`union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,`
			`union ieee754dp y)`
			`{`
			`return _dp_maddf(z, x, y, 0);`
			`}`

			`union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,`
			`union ieee754dp y)`
			`{`
			`return _dp_maddf(z, x, y, maddf_negate_product);`
			`}`