b3f271e86e
Implement a POWER7 optimised memcpy using VMX and enhanced prefetch instructions. This is a copy of the POWER7 optimised copy_to_user/copy_from_user loop. Detailed implementation and performance details can be found in commit a66086b8197d (powerpc: POWER7 optimised copy_to_user/copy_from_user using VMX). I noticed memcpy issues when profiling a RAID6 workload: .memcpy .async_memcpy .async_copy_data .__raid_run_ops .handle_stripe .raid5d .md_thread I created a simplified testcase by building a RAID6 array with 4 1GB ramdisks (booting with brd.rd_size=1048576): # mdadm -CR -e 1.2 /dev/md0 --level=6 -n4 /dev/ram[0-3] I then timed how long it took to write to the entire array: # dd if=/dev/zero of=/dev/md0 bs=1M Before: 892 MB/s After: 999 MB/s A 12% improvement. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
36 lines
987 B
Makefile
36 lines
987 B
Makefile
#
|
|
# Makefile for ppc-specific library files..
|
|
#
|
|
|
|
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
|
|
|
|
ccflags-$(CONFIG_PPC64) := -mno-minimal-toc
|
|
|
|
CFLAGS_REMOVE_code-patching.o = -pg
|
|
CFLAGS_REMOVE_feature-fixups.o = -pg
|
|
|
|
obj-y := string.o alloc.o \
|
|
checksum_$(CONFIG_WORD_SIZE).o crtsavres.o
|
|
obj-$(CONFIG_PPC32) += div64.o copy_32.o
|
|
obj-$(CONFIG_HAS_IOMEM) += devres.o
|
|
|
|
obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
|
|
memcpy_64.o usercopy_64.o mem_64.o string.o \
|
|
checksum_wrappers_64.o hweight_64.o \
|
|
copyuser_power7.o string_64.o copypage_power7.o \
|
|
memcpy_power7.o
|
|
obj-$(CONFIG_XMON) += sstep.o ldstfp.o
|
|
obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o
|
|
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o
|
|
|
|
ifeq ($(CONFIG_PPC64),y)
|
|
obj-$(CONFIG_SMP) += locks.o
|
|
obj-$(CONFIG_ALTIVEC) += vmx-helper.o
|
|
endif
|
|
|
|
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
|
|
|
|
obj-y += code-patching.o
|
|
obj-y += feature-fixups.o
|
|
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
|