2db4bc3418
With CPU_DADDI_WORKAROUNDS enabled __delay assembles with a macro in a branch delay slot: {standard input}: Assembler messages: {standard input}:18: Warning: Macro instruction expanded into multiple instructions in a branch delay slot and broken code results: 0000000000000000 <__delay>: 0: 1480ffff bnez a0,0 <__delay> 4: 24010001 li at,1 8: 0081202f dsubu a0,a0,at c: 03e00008 jr ra 10: 00000000 nop 14: 00000000 nop Consequently the function loops indefinitely, showing up prominently as a hang in the delay loop calibration at bootstrap. This change corrects the problem by forcing the immediate 1 into a register while keeping code produced identical where CPU_DADDI_WORKAROUNDS is disabled. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/6669/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
67 lines
1.6 KiB
C
67 lines
1.6 KiB
C
/*
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
* for more details.
|
|
*
|
|
* Copyright (C) 1994 by Waldorf Electronics
|
|
* Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle
|
|
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
|
|
* Copyright (C) 2007, 2014 Maciej W. Rozycki
|
|
*/
|
|
#include <linux/module.h>
|
|
#include <linux/param.h>
|
|
#include <linux/smp.h>
|
|
|
|
#include <asm/compiler.h>
|
|
#include <asm/war.h>
|
|
|
|
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
|
|
#define GCC_DADDI_IMM_ASM() "I"
|
|
#else
|
|
#define GCC_DADDI_IMM_ASM() "r"
|
|
#endif
|
|
|
|
void __delay(unsigned long loops)
|
|
{
|
|
__asm__ __volatile__ (
|
|
" .set noreorder \n"
|
|
" .align 3 \n"
|
|
"1: bnez %0, 1b \n"
|
|
#if BITS_PER_LONG == 32
|
|
" subu %0, %1 \n"
|
|
#else
|
|
" dsubu %0, %1 \n"
|
|
#endif
|
|
" .set reorder \n"
|
|
: "=r" (loops)
|
|
: GCC_DADDI_IMM_ASM() (1), "0" (loops));
|
|
}
|
|
EXPORT_SYMBOL(__delay);
|
|
|
|
/*
|
|
* Division by multiplication: you don't have to worry about
|
|
* loss of precision.
|
|
*
|
|
* Use only for very small delays ( < 1 msec). Should probably use a
|
|
* lookup table, really, as the multiplications take much too long with
|
|
* short delays. This is a "reasonable" implementation, though (and the
|
|
* first constant multiplications gets optimized away if the delay is
|
|
* a constant)
|
|
*/
|
|
|
|
void __udelay(unsigned long us)
|
|
{
|
|
unsigned int lpj = raw_current_cpu_data.udelay_val;
|
|
|
|
__delay((us * 0x000010c7ull * HZ * lpj) >> 32);
|
|
}
|
|
EXPORT_SYMBOL(__udelay);
|
|
|
|
void __ndelay(unsigned long ns)
|
|
{
|
|
unsigned int lpj = raw_current_cpu_data.udelay_val;
|
|
|
|
__delay((ns * 0x00000005ull * HZ * lpj) >> 32);
|
|
}
|
|
EXPORT_SYMBOL(__ndelay);
|