powerpc: Improve resolution of VDSO clock_gettime

Currently the clock_gettime implementation in the VDSO produces a
result with microsecond resolution for the cases that are handled
without a system call, i.e. CLOCK_REALTIME and CLOCK_MONOTONIC.  The
nanoseconds field of the result is obtained by computing a
microseconds value and multiplying by 1000.

This changes the code in the VDSO to do the computation for
clock_gettime with nanosecond resolution.  That means that the
resolution of the result will ultimately depend on the timebase
frequency.

Because the timestamp in the VDSO datapage (stamp_xsec, the real time
corresponding to the timebase count in tb_orig_stamp) is in units of
2^-20 seconds, it doesn't have sufficient resolution for computing a
result with nanosecond resolution.  Therefore this adds a copy of
xtime to the VDSO datapage and updates it in update_gtod() along with
the other time-related fields.

Signed-off-by: Paul Mackerras <paulus@samba.org>
This commit is contained in:
Paul Mackerras 2008-10-27 23:56:03 +00:00
parent c73049f6aa
commit 597bc5c00b
5 changed files with 206 additions and 148 deletions

View File

@ -39,6 +39,7 @@
#ifndef __ASSEMBLY__
#include <linux/unistd.h>
#include <linux/time.h>
#define SYSCALL_MAP_SIZE ((__NR_syscalls + 31) / 32)
@ -83,6 +84,7 @@ struct vdso_data {
__u32 icache_log_block_size; /* L1 i-cache log block size */
__s32 wtom_clock_sec; /* Wall to monotonic clock */
__s32 wtom_clock_nsec;
struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */
__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
};
@ -102,6 +104,7 @@ struct vdso_data {
__u32 tz_dsttime; /* Type of dst correction 0x5C */
__s32 wtom_clock_sec; /* Wall to monotonic clock */
__s32 wtom_clock_nsec;
struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
__u32 dcache_block_size; /* L1 d-cache block size */
__u32 icache_block_size; /* L1 i-cache block size */

View File

@ -306,6 +306,7 @@ int main(void)
DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime));
DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));
DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));
DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size));

View File

@ -456,6 +456,7 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
vdso_data->tb_to_xs = new_tb_to_xs;
vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
vdso_data->stamp_xtime = xtime;
smp_wmb();
++(vdso_data->tb_update_count);
}

View File

@ -16,6 +16,13 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
/* Offset for the low 32-bit part of a field of long type */
#ifdef CONFIG_PPC64
#define LOPART 4
#else
#define LOPART 0
#endif
.text
/*
* Exact prototype of gettimeofday
@ -90,101 +97,53 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
mflr r12 /* r12 saves lr */
.cfi_register lr,r12
mr r10,r3 /* r10 saves id */
mr r11,r4 /* r11 saves tp */
bl __get_datapage@local /* get data page */
mr r9,r3 /* datapage ptr in r9 */
beq cr1,50f /* if monotonic -> jump there */
/*
* CLOCK_REALTIME
*/
bl __do_get_xsec@local /* get xsec from tb & kernel */
bne- 98f /* out of line -> do syscall */
/* seconds are xsec >> 20 */
rlwinm r5,r4,12,20,31
rlwimi r5,r3,12,0,19
stw r5,TSPC32_TV_SEC(r11)
/* get remaining xsec and convert to nsec. we scale
* up remaining xsec by 12 bits and get the top 32 bits
* of the multiplication, then we multiply by 1000
*/
rlwinm r5,r4,12,0,19
lis r6,1000000@h
ori r6,r6,1000000@l
mulhwu r5,r5,r6
mulli r5,r5,1000
stw r5,TSPC32_TV_NSEC(r11)
mtlr r12
crclr cr0*4+so
li r3,0
blr
50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
bne cr1,80f /* not monotonic -> all done */
/*
* CLOCK_MONOTONIC
*/
50: bl __do_get_xsec@local /* get xsec from tb & kernel */
bne- 98f /* out of line -> do syscall */
/* seconds are xsec >> 20 */
rlwinm r6,r4,12,20,31
rlwimi r6,r3,12,0,19
/* get remaining xsec and convert to nsec. we scale
* up remaining xsec by 12 bits and get the top 32 bits
* of the multiplication, then we multiply by 1000
*/
rlwinm r7,r4,12,0,19
lis r5,1000000@h
ori r5,r5,1000000@l
mulhwu r7,r7,r5
mulli r7,r7,1000
/* now we must fixup using wall to monotonic. We need to snapshot
* that value and do the counter trick again. Fortunately, we still
* have the counter value in r8 that was returned by __do_get_xsec.
* At this point, r6,r7 contain our sec/nsec values, r3,r4 and r5
* can be used
* At this point, r3,r4 contain our sec/nsec values, r5 and r6
* can be used, r7 contains NSEC_PER_SEC.
*/
lwz r3,WTOM_CLOCK_SEC(r9)
lwz r4,WTOM_CLOCK_NSEC(r9)
lwz r5,WTOM_CLOCK_SEC(r9)
lwz r6,WTOM_CLOCK_NSEC(r9)
/* We now have our result in r3,r4. We create a fake dependency
* on that result and re-check the counter
/* We now have our offset in r5,r6. We create a fake dependency
* on that value and re-check the counter
*/
or r5,r4,r3
xor r0,r5,r5
or r0,r6,r5
xor r0,r0,r0
add r9,r9,r0
#ifdef CONFIG_PPC64
lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
#else
lwz r0,(CFG_TB_UPDATE_COUNT)(r9)
#endif
lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
cmpl cr0,r8,r0 /* check if updated */
bne- 50b
/* Calculate and store result. Note that this mimmics the C code,
/* Calculate and store result. Note that this mimics the C code,
* which may cause funny results if nsec goes negative... is that
* possible at all ?
*/
add r3,r3,r6
add r4,r4,r7
lis r5,NSEC_PER_SEC@h
ori r5,r5,NSEC_PER_SEC@l
cmpl cr0,r4,r5
cmpli cr1,r4,0
add r3,r3,r5
add r4,r4,r6
cmpw cr0,r4,r7
cmpwi cr1,r4,0
blt 1f
subf r4,r5,r4
subf r4,r7,r4
addi r3,r3,1
1: bge cr1,1f
1: bge cr1,80f
addi r3,r3,-1
add r4,r4,r5
1: stw r3,TSPC32_TV_SEC(r11)
add r4,r4,r7
80: stw r3,TSPC32_TV_SEC(r11)
stw r4,TSPC32_TV_NSEC(r11)
mtlr r12
@ -195,10 +154,6 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
/*
* syscall fallback
*/
98:
mtlr r12
mr r3,r10
mr r4,r11
99:
li r0,__NR_clock_gettime
sc
@ -254,11 +209,7 @@ __do_get_xsec:
/* Check for update count & load values. We use the low
* order 32 bits of the update count
*/
#ifdef CONFIG_PPC64
1: lwz r8,(CFG_TB_UPDATE_COUNT+4)(r9)
#else
1: lwz r8,(CFG_TB_UPDATE_COUNT)(r9)
#endif
1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
andi. r0,r8,1 /* pending update ? loop */
bne- 1b
xor r0,r8,r8 /* create dependency */
@ -305,11 +256,7 @@ __do_get_xsec:
or r6,r4,r3
xor r0,r6,r6
add r9,r9,r0
#ifdef CONFIG_PPC64
lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
#else
lwz r0,(CFG_TB_UPDATE_COUNT)(r9)
#endif
lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
cmpl cr0,r8,r0 /* check if updated */
bne- 1b
@ -322,3 +269,98 @@ __do_get_xsec:
*/
3: blr
.cfi_endproc
/*
* This is the core of clock_gettime(), it returns the current
* time in seconds and nanoseconds in r3 and r4.
* It expects the datapage ptr in r9 and doesn't clobber it.
* It clobbers r0, r5, r6, r10 and returns NSEC_PER_SEC in r7.
* On return, r8 contains the counter value that can be reused.
* This clobbers cr0 but not any other cr field.
*/
__do_get_tspec:
.cfi_startproc
/* Check for update count & load values. We use the low
* order 32 bits of the update count
*/
1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
andi. r0,r8,1 /* pending update ? loop */
bne- 1b
xor r0,r8,r8 /* create dependency */
add r9,r9,r0
/* Load orig stamp (offset to TB) */
lwz r5,CFG_TB_ORIG_STAMP(r9)
lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
/* Get a stable TB value */
2: mftbu r3
mftbl r4
mftbu r0
cmpl cr0,r3,r0
bne- 2b
/* Subtract tb orig stamp and shift left 12 bits.
*/
subfc r7,r6,r4
subfe r0,r5,r3
slwi r0,r0,12
rlwimi. r0,r7,12,20,31
slwi r7,r7,12
/* Load scale factor & do multiplication */
lwz r5,CFG_TB_TO_XS(r9) /* load values */
lwz r6,(CFG_TB_TO_XS+4)(r9)
mulhwu r3,r7,r6
mullw r10,r7,r5
mulhwu r4,r7,r5
addc r10,r3,r10
li r3,0
beq+ 4f /* skip high part computation if 0 */
mulhwu r3,r0,r5
mullw r7,r0,r5
mulhwu r5,r0,r6
mullw r6,r0,r6
adde r4,r4,r7
addze r3,r3
addc r4,r4,r5
addze r3,r3
addc r10,r10,r6
4: addze r4,r4 /* add in carry */
lis r7,NSEC_PER_SEC@h
ori r7,r7,NSEC_PER_SEC@l
mulhwu r4,r4,r7 /* convert to nanoseconds */
/* At this point, we have seconds & nanoseconds since the xtime
* stamp in r3+CA and r4. Load & add the xtime stamp.
*/
#ifdef CONFIG_PPC64
lwz r5,STAMP_XTIME+TSPC64_TV_SEC+LOPART(r9)
lwz r6,STAMP_XTIME+TSPC64_TV_NSEC+LOPART(r9)
#else
lwz r5,STAMP_XTIME+TSPC32_TV_SEC(r9)
lwz r6,STAMP_XTIME+TSPC32_TV_NSEC(r9)
#endif
add r4,r4,r6
adde r3,r3,r5
/* We now have our result in r3,r4. We create a fake dependency
* on that result and re-check the counter
*/
or r6,r4,r3
xor r0,r6,r6
add r9,r9,r0
lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
cmpl cr0,r8,r0 /* check if updated */
bne- 1b
/* check for nanosecond overflow and adjust if necessary */
cmpw r4,r7
bltlr /* all done if no overflow */
subf r4,r7,r4 /* adjust if overflow */
addi r3,r3,1
blr
.cfi_endproc

View File

@ -75,90 +75,49 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
mflr r12 /* r12 saves lr */
.cfi_register lr,r12
mr r10,r3 /* r10 saves id */
mr r11,r4 /* r11 saves tp */
bl V_LOCAL_FUNC(__get_datapage) /* get data page */
beq cr1,50f /* if monotonic -> jump there */
/*
* CLOCK_REALTIME
*/
bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
ori r7,r7,16960
rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
std r5,TSPC64_TV_SEC(r11) /* store sec in tv */
subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
* XSEC_PER_SEC
*/
rldicl r0,r0,44,20
mulli r0,r0,1000 /* nsec = usec * 1000 */
std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */
mtlr r12
crclr cr0*4+so
li r3,0
blr
50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
bne cr1,80f /* if not monotonic, all done */
/*
* CLOCK_MONOTONIC
*/
50: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
ori r7,r7,16960
rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
* XSEC_PER_SEC
*/
rldicl r6,r0,44,20
mulli r6,r6,1000 /* nsec = usec * 1000 */
/* now we must fixup using wall to monotonic. We need to snapshot
* that value and do the counter trick again. Fortunately, we still
* have the counter value in r8 that was returned by __do_get_xsec.
* At this point, r5,r6 contain our sec/nsec values.
* can be used
* have the counter value in r8 that was returned by __do_get_tspec.
* At this point, r4,r5 contain our sec/nsec values.
*/
lwa r4,WTOM_CLOCK_SEC(r3)
lwa r7,WTOM_CLOCK_NSEC(r3)
lwa r6,WTOM_CLOCK_SEC(r3)
lwa r9,WTOM_CLOCK_NSEC(r3)
/* We now have our result in r4,r7. We create a fake dependency
/* We now have our result in r6,r9. We create a fake dependency
* on that result and re-check the counter
*/
or r9,r4,r7
xor r0,r9,r9
or r0,r6,r9
xor r0,r0,r0
add r3,r3,r0
ld r0,CFG_TB_UPDATE_COUNT(r3)
cmpld cr0,r0,r8 /* check if updated */
bne- 50b
/* Calculate and store result. Note that this mimmics the C code,
* which may cause funny results if nsec goes negative... is that
* possible at all ?
/* Add wall->monotonic offset and check for overflow or underflow.
*/
add r4,r4,r5
add r7,r7,r6
lis r9,NSEC_PER_SEC@h
ori r9,r9,NSEC_PER_SEC@l
cmpl cr0,r7,r9
cmpli cr1,r7,0
add r4,r4,r6
add r5,r5,r9
cmpd cr0,r5,r7
cmpdi cr1,r5,0
blt 1f
subf r7,r9,r7
subf r5,r7,r5
addi r4,r4,1
1: bge cr1,1f
1: bge cr1,80f
addi r4,r4,-1
add r7,r7,r9
1: std r4,TSPC64_TV_SEC(r11)
std r7,TSPC64_TV_NSEC(r11)
add r5,r5,r7
80: std r4,TSPC64_TV_SEC(r11)
std r5,TSPC64_TV_NSEC(r11)
mtlr r12
crclr cr0*4+so
@ -168,10 +127,6 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
/*
* syscall fallback
*/
98:
mtlr r12
mr r3,r10
mr r4,r11
99:
li r0,__NR_clock_gettime
sc
@ -253,3 +208,59 @@ V_FUNCTION_BEGIN(__do_get_xsec)
blr
.cfi_endproc
V_FUNCTION_END(__do_get_xsec)
/*
* This is the core of clock_gettime(), it returns the current
* time in seconds and nanoseconds in r4 and r5.
* It expects the datapage ptr in r3 and doesn't clobber it.
* It clobbers r0 and r6 and returns NSEC_PER_SEC in r7.
* On return, r8 contains the counter value that can be reused.
* This clobbers cr0 but not any other cr field.
*/
V_FUNCTION_BEGIN(__do_get_tspec)
.cfi_startproc
/* check for update count & load values */
1: ld r8,CFG_TB_UPDATE_COUNT(r3)
andi. r0,r8,1 /* pending update ? loop */
bne- 1b
xor r0,r8,r8 /* create dependency */
add r3,r3,r0
/* Get TB & offset it. We use the MFTB macro which will generate
* workaround code for Cell.
*/
MFTB(r7)
ld r9,CFG_TB_ORIG_STAMP(r3)
subf r7,r9,r7
/* Scale result */
ld r5,CFG_TB_TO_XS(r3)
sldi r7,r7,12 /* compute time since stamp_xtime */
mulhdu r6,r7,r5 /* in units of 2^-32 seconds */
/* Add stamp since epoch */
ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
or r0,r4,r5
or r0,r0,r6
xor r0,r0,r0
add r3,r3,r0
ld r0,CFG_TB_UPDATE_COUNT(r3)
cmpld r0,r8 /* check if updated */
bne- 1b /* reload if so */
/* convert to seconds & nanoseconds and add to stamp */
lis r7,NSEC_PER_SEC@h
ori r7,r7,NSEC_PER_SEC@l
mulhwu r0,r6,r7 /* compute nanoseconds and */
srdi r6,r6,32 /* seconds since stamp_xtime */
clrldi r0,r0,32
add r5,r5,r0 /* add nanoseconds together */
cmpd r5,r7 /* overflow? */
add r4,r4,r6
bltlr /* all done if no overflow */
subf r5,r7,r5 /* if overflow, adjust */
addi r4,r4,1
blr
.cfi_endproc
V_FUNCTION_END(__do_get_tspec)