From 7c67928648188d193c9f73679f7e6aa67d088237 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Sun, 9 Jun 2024 21:02:19 +0200
Subject: [PATCH] s390: Provide optimized __arch_hweight*() implementations

Make use of the popcnt instruction to provide optimized __arch_hweight*()
implementations. The generated code is shorter and avoids rather expensive
functions calls.

Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
---
 arch/s390/Kconfig                    |  2 +-
 arch/s390/include/asm/arch_hweight.h | 76 ++++++++++++++++++++++++++++
 arch/s390/include/asm/bitops.h       |  3 +-
 3 files changed, 79 insertions(+), 2 deletions(-)
 create mode 100644 arch/s390/include/asm/arch_hweight.h

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c59d2b54df49..9788ae5e930b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -21,7 +21,7 @@ config ARCH_PROC_KCORE_TEXT
 	def_bool y
 
 config GENERIC_HWEIGHT
-	def_bool y
+	def_bool !HAVE_MARCH_Z196_FEATURES
 
 config GENERIC_BUG
 	def_bool y if BUG
diff --git a/arch/s390/include/asm/arch_hweight.h b/arch/s390/include/asm/arch_hweight.h
new file mode 100644
index 000000000000..50e23ce854e5
--- /dev/null
+++ b/arch/s390/include/asm/arch_hweight.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_ARCH_HWEIGHT_H
+#define _ASM_S390_ARCH_HWEIGHT_H
+
+#include <linux/types.h>
+
+static __always_inline unsigned long popcnt_z196(unsigned long w)
+{
+	unsigned long cnt;
+
+	asm volatile(".insn	rrf,0xb9e10000,%[cnt],%[w],0,0"
+		     : [cnt] "=d" (cnt)
+		     : [w] "d" (w)
+		     : "cc");
+	return cnt;
+}
+
+static __always_inline unsigned long popcnt_z15(unsigned long w)
+{
+	unsigned long cnt;
+
+	asm volatile(".insn	rrf,0xb9e10000,%[cnt],%[w],8,0"
+		     : [cnt] "=d" (cnt)
+		     : [w] "d" (w)
+		     : "cc");
+	return cnt;
+}
+
+static __always_inline unsigned long __arch_hweight64(__u64 w)
+{
+	if (IS_ENABLED(CONFIG_HAVE_MARCH_Z15_FEATURES))
+		return popcnt_z15(w);
+	if (IS_ENABLED(CONFIG_HAVE_MARCH_Z196_FEATURES)) {
+		w = popcnt_z196(w);
+		w += w >> 32;
+		w += w >> 16;
+		w += w >> 8;
+		return w & 0xff;
+	}
+	return __sw_hweight64(w);
+}
+
+static __always_inline unsigned int __arch_hweight32(unsigned int w)
+{
+	if (IS_ENABLED(CONFIG_HAVE_MARCH_Z15_FEATURES))
+		return popcnt_z15(w);
+	if (IS_ENABLED(CONFIG_HAVE_MARCH_Z196_FEATURES)) {
+		w = popcnt_z196(w);
+		w += w >> 16;
+		w += w >> 8;
+		return w & 0xff;
+	}
+	return __sw_hweight32(w);
+}
+
+static __always_inline unsigned int __arch_hweight16(unsigned int w)
+{
+	if (IS_ENABLED(CONFIG_HAVE_MARCH_Z15_FEATURES))
+		return popcnt_z15((unsigned short)w);
+	if (IS_ENABLED(CONFIG_HAVE_MARCH_Z196_FEATURES)) {
+		w = popcnt_z196(w);
+		w += w >> 8;
+		return w & 0xff;
+	}
+	return __sw_hweight16(w);
+}
+
+static __always_inline unsigned int __arch_hweight8(unsigned int w)
+{
+	if (IS_ENABLED(CONFIG_HAVE_MARCH_Z196_FEATURES))
+		return popcnt_z196((unsigned char)w);
+	return __sw_hweight8(w);
+}
+
+#endif /* _ASM_S390_ARCH_HWEIGHT_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index c467dffa8c12..54a079cd39ed 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -379,8 +379,9 @@ static inline int fls(unsigned int word)
 	return fls64(word);
 }
 
+#include <asm/arch_hweight.h>
+#include <asm-generic/bitops/const_hweight.h>
 #include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/hweight.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/le.h>
 #include <asm-generic/bitops/ext2-atomic-setbit.h>