Merge remote-tracking branch 'torvalds/master' into perf/core

To resolve a trivial merge conflict with c302378bc157f6a7 ("libbpf: Hashmap interface update to allow both long and void* keys/values"), where a function present upstream was removed in the perf tools development tree. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2022-12-16 09:53:53 -03:00 · 2022-12-16 09:53:53 -03:00 · 1a931707ad
commit 1a931707ad
parent 818448e9cf 84e57d2922
10549 changed files with 518868 additions and 182903 deletions
--- a/.clang-format
+++ b/.clang-format
@ -222,6 +222,7 @@ ForEachMacros:
  - 'for_each_component_dais'
  - 'for_each_component_dais_safe'
  - 'for_each_console'
+  - 'for_each_console_srcu'
  - 'for_each_cpu'
  - 'for_each_cpu_and'
  - 'for_each_cpu_not'
@ -440,8 +441,11 @@ ForEachMacros:
  - 'inet_lhash2_for_each_icsk'
  - 'inet_lhash2_for_each_icsk_continue'
  - 'inet_lhash2_for_each_icsk_rcu'
+  - 'interval_tree_for_each_double_span'
+  - 'interval_tree_for_each_span'
  - 'intlist__for_each_entry'
  - 'intlist__for_each_entry_safe'
+  - 'iopt_for_each_contig_area'
  - 'kcore_copy__for_each_phdr'
  - 'key_for_each'
  - 'key_for_each_safe'
@ -535,6 +539,7 @@ ForEachMacros:
  - 'perf_hpp_list__for_each_sort_list_safe'
  - 'perf_pmu__for_each_hybrid_pmu'
  - 'ping_portaddr_for_each_entry'
+  - 'ping_portaddr_for_each_entry_rcu'
  - 'plist_for_each'
  - 'plist_for_each_continue'
  - 'plist_for_each_entry'
--- a/.gitignore
+++ b/.gitignore
@ -20,6 +20,7 @@
 *.dtb
 *.dtbo
 *.dtb.S
+*.dtbo.S
 *.dwo
 *.elf
 *.gcno
--- a/.mailmap
+++ b/.mailmap
@ -29,6 +29,7 @@ Alexandre Belloni <alexandre.belloni@bootlin.com> <alexandre.belloni@free-electr
 Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
 Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
 Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>
+Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com>
 Alex Shi <alexs@kernel.org> <alex.shi@intel.com>
 Alex Shi <alexs@kernel.org> <alex.shi@linaro.org>
 Alex Shi <alexs@kernel.org> <alex.shi@linux.alibaba.com>
@ -227,6 +228,7 @@ Juha Yrjola <at solidboot.com>
 Juha Yrjola <juha.yrjola@nokia.com>
 Juha Yrjola <juha.yrjola@solidboot.com>
 Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
+Iskren Chernev <me@iskren.info> <iskren.chernev@gmail.com>
 Kalle Valo <kvalo@kernel.org> <kvalo@codeaurora.org>
 Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org>
 Kay Sievers <kay.sievers@vrfy.org>
@ -286,6 +288,7 @@ Matthew Wilcox <willy@infradead.org> <willy@linux.intel.com>
 Matthew Wilcox <willy@infradead.org> <willy@parisc-linux.org>
 Matthias Fuchs <socketcan@esd.eu> <matthias.fuchs@esd.eu>
 Matthieu CASTET <castet.matthieu@free.fr>
+Matti Vaittinen <mazziesaccount@gmail.com> <matti.vaittinen@fi.rohmeurope.com>
 Matt Ranostay <matt.ranostay@konsulko.com> <matt@ranostay.consulting>
 Matt Ranostay <mranostay@gmail.com> Matthew Ranostay <mranostay@embeddedalley.com>
 Matt Ranostay <mranostay@gmail.com> <matt.ranostay@intel.com>
@ -371,6 +374,8 @@ Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com>
 Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com>
 Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com>
 Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru>
+Muchun Song <muchun.song@linux.dev> <songmuchun@bytedance.com>
+Muchun Song <muchun.song@linux.dev> <smuchun@gmail.com>
 Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com>
 Rudolf Marek <R.Marek@sh.cvut.cz>
 Rui Saraiva <rmps@joel.ist.utl.pt>
@ -382,6 +387,7 @@ Santosh Shilimkar <santosh.shilimkar@oracle.org>
 Santosh Shilimkar <ssantosh@kernel.org>
 Sarangdhar Joshi <spjoshi@codeaurora.org>
 Sascha Hauer <s.hauer@pengutronix.de>
+Satya Priya <quic_c_skakit@quicinc.com> <skakit@codeaurora.org>
 S.Çağlar Onur <caglar@pardus.org.tr>
 Sean Christopherson <seanjc@google.com> <sean.j.christopherson@intel.com>
 Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk>
@ -389,6 +395,7 @@ Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk>
 Sebastian Reichel <sre@kernel.org> <sre@debian.org>
 Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de>
 Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com>
+Shannon Nelson <shannon.nelson@amd.com> <snelson@pensando.io>
 Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
 Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
 Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
--- a/Documentation/ABI/testing/debugfs-dell-wmi-ddv
+++ b/Documentation/ABI/testing/debugfs-dell-wmi-ddv
@ -0,0 +1,21 @@
+What:		/sys/kernel/debug/dell-wmi-ddv-<wmi_device_name>/fan_sensor_information
+Date:		September 2022
+KernelVersion:	6.1
+Contact:	Armin Wolf <W_Armin@gmx.de>
+Description:
+		This file contains the contents of the fan sensor information buffer,
+		which contains fan sensor entries and a terminating character (0xFF).
+
+		Each fan sensor entry consists of three bytes with an unknown meaning,
+		interested people may use this file for reverse-engineering.
+
+What:		/sys/kernel/debug/dell-wmi-ddv-<wmi_device_name>/thermal_sensor_information
+Date:		September 2022
+KernelVersion:	6.1
+Contact:	Armin Wolf <W_Armin@gmx.de>
+Description:
+		This file contains the contents of the thermal sensor information buffer,
+		which contains thermal sensor entries and a terminating character (0xFF).
+
+		Each thermal sensor entry consists of five bytes with an unknown meaning,
+		interested people may use this file for reverse-engineering.
--- a/Documentation/ABI/testing/debugfs-pktcdvd
+++ b/Documentation/ABI/testing/debugfs-pktcdvd
@ -1,18 +0,0 @@
-What:           /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
-Date:           Oct. 2006
-KernelVersion:  2.6.20
-Contact:        Thomas Maier <balagi@justmail.de>
-Description:
-
-The pktcdvd module (packet writing driver) creates
-these files in debugfs:
-
-/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
-
-    ====            ====== ====================================
-    info            0444   Lots of driver statistics and infos.
-    ====            ====== ====================================
-
-Example::
-
-    cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@ -137,3 +137,17 @@ Description:
 		The writeback_limit file is read-write and specifies the maximum
 		amount of writeback ZRAM can do. The limit could be changed
 		in run time.
+
+What:		/sys/block/zram<id>/recomp_algorithm
+Date:		November 2022
+Contact:	Sergey Senozhatsky <senozhatsky@chromium.org>
+Description:
+		The recomp_algorithm file is read-write and allows to set
+		or show secondary compression algorithms.
+
+What:		/sys/block/zram<id>/recompress
+Date:		November 2022
+Contact:	Sergey Senozhatsky <senozhatsky@chromium.org>
+Description:
+		The recompress file is write-only and triggers re-compression
+		with secondary compression algorithms.
--- a/Documentation/ABI/testing/sysfs-bus-nvdimm
+++ b/Documentation/ABI/testing/sysfs-bus-nvdimm
@ -41,3 +41,17 @@ KernelVersion:  5.18
 Contact:        Kajol Jain <kjain@linux.ibm.com>
 Description:	(RO) This sysfs file exposes the cpumask which is designated to
 		to retrieve nvdimm pmu event counter data.
+
+What:		/sys/bus/nd/devices/nmemX/cxl/id
+Date:		November 2022
+KernelVersion:	6.2
+Contact:	Dave Jiang <dave.jiang@intel.com>
+Description:	(RO) Show the id (serial) of the device. This is CXL specific.
+
+What:		/sys/bus/nd/devices/nmemX/cxl/provider
+Date:		November 2022
+KernelVersion:	6.2
+Contact:	Dave Jiang <dave.jiang@intel.com>
+Description:	(RO) Shows the CXL bridge device that ties to a CXL memory device
+		to this NVDIMM device. I.e. the parent of the device returned is
+		a /sys/bus/cxl/devices/memX instance.
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@ -407,6 +407,16 @@ Description:
 	        file contains a '1' if the memory has been published for
 		use outside the driver that owns the device.

+What:		/sys/bus/pci/devices/.../p2pmem/allocate
+Date:		August 2022
+Contact:	Logan Gunthorpe <logang@deltatee.com>
+Description:
+		This file allows mapping p2pmem into userspace. For each
+		mmap() call on this file, the kernel will allocate a chunk
+		of Peer-to-Peer memory for use in Peer-to-Peer transactions.
+		This memory can be used in O_DIRECT calls to NVMe backed
+		files for Peer-to-Peer copies.
+
 What:		/sys/bus/pci/devices/.../link/clkpm
 		/sys/bus/pci/devices/.../link/l0s_aspm
 		/sys/bus/pci/devices/.../link/l1_aspm
--- a/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor
+++ b/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor
@ -5,6 +5,9 @@ Contact:	linux-mtd@lists.infradead.org
 Description:	(RO) The JEDEC ID of the SPI NOR flash as reported by the
 		flash device.

+		The attribute is not present if the flash doesn't support
+		the "Read JEDEC ID" command (9Fh). This is the case for
+		non-JEDEC compliant flashes.

 What:		/sys/bus/spi/devices/.../spi-nor/manufacturer
 Date:		April 2021
@ -12,6 +15,9 @@ KernelVersion:	5.14
 Contact:	linux-mtd@lists.infradead.org
 Description:	(RO) Manufacturer of the SPI NOR flash.

+		The attribute is not present if the flash device isn't
+		known to the kernel and is only probed by its SFDP
+		tables.

 What:		/sys/bus/spi/devices/.../spi-nor/partname
 Date:		April 2021
--- a/Documentation/ABI/testing/sysfs-class-bdi
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@ -44,6 +44,21 @@ Description:

 	(read-write)

+What:		/sys/class/bdi/<bdi>/min_ratio_fine
+Date:		November 2022
+Contact:	Stefan Roesch <shr@devkernel.io>
+Description:
+	Under normal circumstances each device is given a part of the
+	total write-back cache that relates to its current average
+	writeout speed in relation to the other devices.
+
+	The 'min_ratio_fine' parameter allows assigning a minimum reserve
+	of the write-back cache to a particular device. The value is
+	expressed as part of 1 million. For example, this is useful for
+	providing a minimum QoS.
+
+	(read-write)
+
 What:		/sys/class/bdi/<bdi>/max_ratio
 Date:		January 2008
 Contact:	Peter Zijlstra <a.p.zijlstra@chello.nl>
@ -55,6 +70,59 @@ Description:
 	mount that is prone to get stuck, or a FUSE mount which cannot
 	be trusted to play fair.

+	(read-write)
+
+What:		/sys/class/bdi/<bdi>/max_ratio_fine
+Date:		November 2022
+Contact:	Stefan Roesch <shr@devkernel.io>
+Description:
+	Allows limiting a particular device to use not more than the
+	given value of the write-back cache.  The value is given as part
+	of 1 million. This is useful in situations where we want to avoid
+	one device taking all or most of the write-back cache.  For example
+	in case of an NFS mount that is prone to get stuck, or a FUSE mount
+	which cannot be trusted to play fair.
+
+	(read-write)
+
+What:		/sys/class/bdi/<bdi>/min_bytes
+Date:		October 2022
+Contact:	Stefan Roesch <shr@devkernel.io>
+Description:
+	Under normal circumstances each device is given a part of the
+	total write-back cache that relates to its current average
+	writeout speed in relation to the other devices.
+
+	The 'min_bytes' parameter allows assigning a minimum
+	percentage of the write-back cache to a particular device
+	expressed in bytes.
+	For example, this is useful for providing a minimum QoS.
+
+	(read-write)
+
+What:		/sys/class/bdi/<bdi>/max_bytes
+Date:		October 2022
+Contact:	Stefan Roesch <shr@devkernel.io>
+Description:
+	Allows limiting a particular device to use not more than the
+	given 'max_bytes' of the write-back cache.  This is useful in
+	situations where we want to avoid one device taking all or
+	most of the write-back cache.  For example in case of an NFS
+	mount that is prone to get stuck, a FUSE mount which cannot be
+	trusted to play fair, or a nbd device.
+
+	(read-write)
+
+What:		/sys/class/bdi/<bdi>/strict_limit
+Date:		October 2022
+Contact:	Stefan Roesch <shr@devkernel.io>
+Description:
+	Forces per-BDI checks for the share of given device in the write-back
+	cache even before the global background dirty limit is reached. This
+	is useful in situations where the global limit is much higher than
+	affordable for given relatively slow (or untrusted) device. Turning
+	strictlimit on has no visible effect if max_ratio is equal to 100%.
+
 	(read-write)
 What:		/sys/class/bdi/<bdi>/stable_pages_required
 Date:		January 2008
--- a/Documentation/ABI/testing/sysfs-class-pktcdvd
+++ b/Documentation/ABI/testing/sysfs-class-pktcdvd
@ -1,97 +0,0 @@
-sysfs interface
---------------
-The pktcdvd module (packet writing driver) creates the following files in the
-sysfs: (<devid> is in the format major:minor)
-
-What:		/sys/class/pktcdvd/add
-What:		/sys/class/pktcdvd/remove
-What:		/sys/class/pktcdvd/device_map
-Date:		Oct. 2006
-KernelVersion:	2.6.20
-Contact:	Thomas Maier <balagi@justmail.de>
-Description:
-
-		==========	==============================================
-		add		(WO) Write a block device id (major:minor) to
-				create a new pktcdvd device and map it to the
-				block device.
-
-		remove		(WO) Write the pktcdvd device id (major:minor)
-				to remove the pktcdvd device.
-
-		device_map	(RO) Shows the device mapping in format:
-				pktcdvd[0-7] <pktdevid> <blkdevid>
-		==========	==============================================
-
-
-What:		/sys/class/pktcdvd/pktcdvd[0-7]/dev
-What:		/sys/class/pktcdvd/pktcdvd[0-7]/uevent
-Date:		Oct. 2006
-KernelVersion:	2.6.20
-Contact:	Thomas Maier <balagi@justmail.de>
-Description:
-		dev:	(RO) Device id
-
-		uevent:	(WO) To send a uevent
-
-
-What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_started
-What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_finished
-What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_written
-What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read
-What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read_gather
-What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/reset
-Date:		Oct. 2006
-KernelVersion:	2.6.20
-Contact:	Thomas Maier <balagi@justmail.de>
-Description:
-		packets_started:	(RO) Number of started packets.
-
-		packets_finished:	(RO) Number of finished packets.
-
-		kb_written:		(RO) kBytes written.
-
-		kb_read:		(RO) kBytes read.
-
-		kb_read_gather:		(RO) kBytes read to fill write packets.
-
-		reset:			(WO) Write any value to it to reset
-					pktcdvd device statistic values, like
-					bytes read/written.
-
-
-What:		/sys/class/pktcdvd/pktcdvd[0-7]/write_queue/size
-What:		/sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_off
-What:		/sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_on
-Date:		Oct. 2006
-KernelVersion:	2.6.20
-Contact:	Thomas Maier <balagi@justmail.de>
-Description:
-		==============	================================================
-		size		(RO) Contains the size of the bio write queue.
-
-		congestion_off	(RW) If bio write queue size is below this mark,
-				accept new bio requests from the block layer.
-
-		congestion_on	(RW) If bio write queue size is higher as this
-				mark, do no longer accept bio write requests
-				from the block layer and wait till the pktcdvd
-				device has processed enough bio's so that bio
-				write queue size is below congestion off mark.
-				A value of <= 0 disables congestion control.
-		==============	================================================
-
-
-Example:
--------
-To use the pktcdvd sysfs interface directly, you can do::
-
-    # create a new pktcdvd device mapped to /dev/hdc
-    echo "22:0" >/sys/class/pktcdvd/add
-    cat /sys/class/pktcdvd/device_map
-    # assuming device pktcdvd0 was created, look at stat's
-    cat /sys/class/pktcdvd/pktcdvd0/stat/kb_written
-    # print the device id of the mapped block device
-    fgrep pktcdvd0 /sys/class/pktcdvd/device_map
-    # remove device, using pktcdvd0 device id   253:0
-    echo "253:0" >/sys/class/pktcdvd/remove
--- a/Documentation/ABI/testing/sysfs-devices-mapping
+++ b/Documentation/ABI/testing/sysfs-devices-mapping
@ -1,6 +1,6 @@
 What:           /sys/devices/uncore_iio_x/dieX
 Date:           February 2020
-Contact:        Roman Sudarikov <roman.sudarikov@linux.intel.com>
+Contact:        Alexander Antonov <alexander.antonov@linux.intel.com>
 Description:
                Each IIO stack (PCIe root port) has its own IIO PMON block, so
                each dieX file (where X is die number) holds "Segment:Root Bus"
@ -32,3 +32,31 @@ Description:
 		    IIO PMU 0 on die 1 belongs to PCI RP on bus 0x40, domain 0x0000
 		    IIO PMU 0 on die 2 belongs to PCI RP on bus 0x80, domain 0x0000
 		    IIO PMU 0 on die 3 belongs to PCI RP on bus 0xc0, domain 0x0000
+
+What:           /sys/devices/uncore_upi_x/dieX
+Date:           March 2022
+Contact:        Alexander Antonov <alexander.antonov@linux.intel.com>
+Description:
+                Each /sys/devices/uncore_upi_X/dieY file holds "upi_Z,die_W"
+                value that means UPI link number X on die Y is connected to UPI
+                link Z on die W and this link between sockets can be monitored
+                by UPI PMON block.
+                For example, 4-die Sapphire Rapids platform has the following
+                UPI 0 topology::
+
+		    # tail /sys/devices/uncore_upi_0/die*
+		    ==> /sys/devices/uncore_upi_0/die0 <==
+		    upi_1,die_1
+		    ==> /sys/devices/uncore_upi_0/die1 <==
+		    upi_0,die_3
+		    ==> /sys/devices/uncore_upi_0/die2 <==
+		    upi_1,die_3
+		    ==> /sys/devices/uncore_upi_0/die3 <==
+		    upi_0,die_1
+
+                Which means::
+
+		    UPI link 0 on die 0 is connected to UPI link 1 on die 1
+		    UPI link 0 on die 1 is connected to UPI link 0 on die 3
+		    UPI link 0 on die 2 is connected to UPI link 1 on die 3
+		    UPI link 0 on die 3 is connected to UPI link 0 on die 1
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@ -0,0 +1,75 @@
+What:		/sys/devices/.../hwmon/hwmon<i>/in0_input
+Date:		February 2023
+KernelVersion:	6.2
+Contact:	intel-gfx@lists.freedesktop.org
+Description:	RO. Current Voltage in millivolt.
+
+		Only supported for particular Intel i915 graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_max
+Date:		February 2023
+KernelVersion:	6.2
+Contact:	intel-gfx@lists.freedesktop.org
+Description:	RW. Card reactive sustained  (PL1/Tau) power limit in microwatts.
+
+		The power controller will throttle the operating frequency
+		if the power averaged over a window (typically seconds)
+		exceeds this limit.
+
+		Only supported for particular Intel i915 graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_rated_max
+Date:		February 2023
+KernelVersion:	6.2
+Contact:	intel-gfx@lists.freedesktop.org
+Description:	RO. Card default power limit (default TDP setting).
+
+		Only supported for particular Intel i915 graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_max_interval
+Date:		February 2023
+KernelVersion:	6.2
+Contact:	intel-gfx@lists.freedesktop.org
+Description:	RW. Sustained power limit interval (Tau in PL1/Tau) in
+		milliseconds over which sustained power is averaged.
+
+		Only supported for particular Intel i915 graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/power1_crit
+Date:		February 2023
+KernelVersion:	6.2
+Contact:	intel-gfx@lists.freedesktop.org
+Description:	RW. Card reactive critical (I1) power limit in microwatts.
+
+		Card reactive critical (I1) power limit in microwatts is exposed
+		for client products. The power controller will throttle the
+		operating frequency if the power averaged over a window exceeds
+		this limit.
+
+		Only supported for particular Intel i915 graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/curr1_crit
+Date:		February 2023
+KernelVersion:	6.2
+Contact:	intel-gfx@lists.freedesktop.org
+Description:	RW. Card reactive critical (I1) power limit in milliamperes.
+
+		Card reactive critical (I1) power limit in milliamperes is
+		exposed for server products. The power controller will throttle
+		the operating frequency if the power averaged over a window
+		exceeds this limit.
+
+		Only supported for particular Intel i915 graphics platforms.
+
+What:		/sys/devices/.../hwmon/hwmon<i>/energy1_input
+Date:		February 2023
+KernelVersion:	6.2
+Contact:	intel-gfx@lists.freedesktop.org
+Description:	RO. Energy input of device or gt in microjoules.
+
+		For i915 device level hwmon devices (name "i915") this
+		reflects energy input for the entire device. For gt level
+		hwmon devices (name "i915_gtN") this reflects energy input
+		for the gt.
+
+		Only supported for particular Intel i915 graphics platforms.
--- a/Documentation/ABI/testing/sysfs-driver-intel_sdsi
+++ b/Documentation/ABI/testing/sysfs-driver-intel_sdsi
@ -4,21 +4,21 @@ KernelVersion:	5.18
 Contact:	"David E. Box" <david.e.box@linux.intel.com>
 Description:
 		This directory contains interface files for accessing Intel
-		Software Defined Silicon (SDSi) features on a CPU. X
-		represents the socket instance (though not the socket ID).
-		The socket ID is determined by reading the registers file
-		and decoding it per the specification.
+		On Demand (formerly Software Defined Silicon or SDSi) features
+		on a CPU. X represents the socket instance (though not the
+		socket ID). The socket ID is determined by reading the
+		registers file and decoding it per the specification.

-		Some files communicate with SDSi hardware through a mailbox.
-		Should the operation fail, one of the following error codes
-		may be returned:
+		Some files communicate with On Demand hardware through a
+		mailbox. Should the operation fail, one of the following error
+		codes may be returned:

 		==========	=====
 		Error Code	Cause
 		==========	=====
 		EIO		General mailbox failure. Log may indicate cause.
 		EBUSY		Mailbox is owned by another agent.
-		EPERM		SDSI capability is not enabled in hardware.
+		EPERM		On Demand capability is not enabled in hardware.
 		EPROTO		Failure in mailbox protocol detected by driver.
 				See log for details.
 		EOVERFLOW	For provision commands, the size of the data
@ -54,8 +54,8 @@ KernelVersion:	5.18
 Contact:	"David E. Box" <david.e.box@linux.intel.com>
 Description:
 		(WO) Used to write an Authentication Key Certificate (AKC) to
-		the SDSi NVRAM for the CPU. The AKC is used to authenticate a
-		Capability Activation Payload. Mailbox command.
+		the On Demand NVRAM for the CPU. The AKC is used to authenticate
+		a Capability Activation Payload. Mailbox command.

 What:		/sys/bus/auxiliary/devices/intel_vsec.sdsi.X/provision_cap
 Date:		Feb 2022
@ -63,17 +63,28 @@ KernelVersion:	5.18
 Contact:	"David E. Box" <david.e.box@linux.intel.com>
 Description:
 		(WO) Used to write a Capability Activation Payload (CAP) to the
-		SDSi NVRAM for the CPU. CAPs are used to activate a given CPU
-		feature. A CAP is validated by SDSi hardware using a previously
-		provisioned AKC file. Upon successful authentication, the CPU
-		configuration is updated. A cold reboot is required to fully
-		activate the feature. Mailbox command.
+		On Demand NVRAM for the CPU. CAPs are used to activate a given
+		CPU feature. A CAP is validated by On Demand hardware using a
+		previously provisioned AKC file. Upon successful authentication,
+		the CPU configuration is updated. A cold reboot is required to
+		fully activate the feature. Mailbox command.
+
+What:		/sys/bus/auxiliary/devices/intel_vsec.sdsi.X/meter_certificate
+Date:		Nov 2022
+KernelVersion:	6.2
+Contact:	"David E. Box" <david.e.box@linux.intel.com>
+Description:
+		(RO) Used to read back the current meter certificate for the CPU
+		from Intel On Demand hardware. The meter certificate contains
+		utilization metrics of On Demand enabled features. Mailbox
+		command.

 What:		/sys/bus/auxiliary/devices/intel_vsec.sdsi.X/state_certificate
 Date:		Feb 2022
 KernelVersion:	5.18
 Contact:	"David E. Box" <david.e.box@linux.intel.com>
 Description:
-		(RO) Used to read back the current State Certificate for the CPU
-		from SDSi hardware. The State Certificate contains information
-		about the current licenses on the CPU. Mailbox command.
+		(RO) Used to read back the current state certificate for the CPU
+		from On Demand hardware. The state certificate contains
+		information about the current licenses on the CPU. Mailbox
+		command.
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@ -99,6 +99,12 @@ Description:	Controls the issue rate of discard commands that consist of small
 		checkpoint is triggered, and issued during the checkpoint.
 		By default, it is disabled with 0.

+What:		/sys/fs/f2fs/<disk>/max_ordered_discard
+Date:		October 2022
+Contact:	"Yangtao Li" <frank.li@vivo.com>
+Description:	Controls the maximum ordered discard, the unit size is one block(4KB).
+		Set it to 16 by default.
+
 What:		/sys/fs/f2fs/<disk>/max_discard_request
 Date:		December 2021
 Contact:	"Konstantin Vyshetsky" <vkon@google.com>
@ -132,7 +138,8 @@ Contact:	"Chao Yu" <yuchao0@huawei.com>
 Description:	Controls discard granularity of inner discard thread. Inner thread
 		will not issue discards with size that is smaller than granularity.
 		The unit size is one block(4KB), now only support configuring
-		in range of [1, 512]. Default value is 4(=16KB).
+		in range of [1, 512]. Default value is 16.
+		For small devices, default value is 1.

 What:		/sys/fs/f2fs/<disk>/umount_discard_timeout
 Date:		January 2019
@ -235,7 +242,7 @@ Description:	Shows total written kbytes issued to disk.
 What:		/sys/fs/f2fs/<disk>/features
 Date:		July 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:	<deprecated: should use /sys/fs/f2fs/<disk>/feature_list/
+Description:	<deprecated: should use /sys/fs/f2fs/<disk>/feature_list/>
 		Shows all enabled features in current device.
 		Supported features:
 		encryption, blkzoned, extra_attr, projquota, inode_checksum,
@ -592,10 +599,10 @@ Description:	With "mode=fragment:block" mount options, we can scatter block allo
 		in the length of 1..<max_fragment_hole> by turns. This value can be set
 		between 1..512 and the default value is 4.

-What:		/sys/fs/f2fs/<disk>/gc_urgent_high_remaining
-Date:		December 2021
-Contact:	"Daeho Jeong" <daehojeong@google.com>
-Description:	You can set the trial count limit for GC urgent high mode with this value.
+What:		/sys/fs/f2fs/<disk>/gc_remaining_trials
+Date:		October 2022
+Contact:	"Yangtao Li" <frank.li@vivo.com>
+Description:	You can set the trial count limit for GC urgent and idle mode with this value.
 		If GC thread gets to the limit, the mode will turn back to GC normal mode.
 		By default, the value is zero, which means there is no limit like before.

@ -634,3 +641,31 @@ Date:		July 2022
 Contact:	"Daeho Jeong" <daehojeong@google.com>
 Description:	Show the accumulated total revoked atomic write block count after boot.
 		If you write "0" here, you can initialize to "0".
+
+What:		/sys/fs/f2fs/<disk>/gc_mode
+Date:		October 2022
+Contact:	"Yangtao Li" <frank.li@vivo.com>
+Description:	Show the current gc_mode as a string.
+		This is a read-only entry.
+
+What:		/sys/fs/f2fs/<disk>/discard_urgent_util
+Date:		November 2022
+Contact:	"Yangtao Li" <frank.li@vivo.com>
+Description:	When space utilization exceeds this, do background DISCARD aggressively.
+		Does DISCARD forcibly in a period of given min_discard_issue_time when the number
+		of discards is not 0 and set discard granularity to 1.
+		Default: 80
+
+What:		/sys/fs/f2fs/<disk>/hot_data_age_threshold
+Date:		November 2022
+Contact:	"Ping Xiong" <xiongping1@xiaomi.com>
+Description:	When DATA SEPARATION is on, it controls the age threshold to indicate
+		the data blocks as hot. By default it was initialized as 262144 blocks
+		(equals to 1GB).
+
+What:		/sys/fs/f2fs/<disk>/warm_data_age_threshold
+Date:		November 2022
+Contact:	"Ping Xiong" <xiongping1@xiaomi.com>
+Description:	When DATA SEPARATION is on, it controls the age threshold to indicate
+		the data blocks as warm. By default it was initialized as 2621440 blocks
+		(equals to 10GB).
--- a/Documentation/ABI/testing/sysfs-kernel-mm-damon
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon
@ -27,6 +27,10 @@ Description:	Writing 'on' or 'off' to this file makes the kdamond starts or
 		makes the kdamond reads the user inputs in the sysfs files
 		except 'state' again.  Writing 'update_schemes_stats' to the
 		file updates contents of schemes stats files of the kdamond.
+		Writing 'update_schemes_tried_regions' to the file updates
+		contents of 'tried_regions' directory of every scheme directory
+		of this kdamond.  Writing 'clear_schemes_tried_regions' to the
+		file removes contents of the 'tried_regions' directory.

 What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/pid
 Date:		Mar 2022
@ -283,3 +287,31 @@ Date:		Mar 2022
 Contact:	SeongJae Park <sj@kernel.org>
 Description:	Reading this file returns the number of the exceed events of
 		the scheme's quotas.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/start
+Date:		Oct 2022
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Reading this file returns the start address of a memory region
+		that corresponding DAMON-based Operation Scheme's action has
+		tried to be applied.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/end
+Date:		Oct 2022
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Reading this file returns the end address of a memory region
+		that corresponding DAMON-based Operation Scheme's action has
+		tried to be applied.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/nr_accesses
+Date:		Oct 2022
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Reading this file returns the 'nr_accesses' of a memory region
+		that corresponding DAMON-based Operation Scheme's action has
+		tried to be applied.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/age
+Date:		Oct 2022
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Reading this file returns the 'age' of a memory region that
+		corresponding DAMON-based Operation Scheme's action has tried
+		to be applied.
--- a/Documentation/ABI/testing/sysfs-kernel-oops_count
+++ b/Documentation/ABI/testing/sysfs-kernel-oops_count
@ -0,0 +1,6 @@
+What:		/sys/kernel/oops_count
+Date:		November 2022
+KernelVersion:	6.2.0
+Contact:	Linux Kernel Hardening List <linux-hardening@vger.kernel.org>
+Description:
+		Shows how many times the system has Oopsed since last boot.
--- a/Documentation/ABI/testing/sysfs-kernel-warn_count
+++ b/Documentation/ABI/testing/sysfs-kernel-warn_count
@ -0,0 +1,6 @@
+What:		/sys/kernel/oops_count
+Date:		November 2022
+KernelVersion:	6.2.0
+Contact:	Linux Kernel Hardening List <linux-hardening@vger.kernel.org>
+Description:
+		Shows how many times the system has Warned since last boot.
--- a/Documentation/ABI/testing/sysfs-platform-dell-wmi-ddv
+++ b/Documentation/ABI/testing/sysfs-platform-dell-wmi-ddv
@ -0,0 +1,7 @@
+What:		/sys/class/power_supply/<battery_name>/eppid
+Date:		September 2022
+KernelVersion:	6.1
+Contact:	Armin Wolf <W_Armin@gmx.de>
+Description:
+		Reports the Dell ePPID (electronic Dell Piece Part Identification)
+		of the ACPI battery.
--- a/Documentation/ABI/testing/sysfs-platform-intel-ifs
+++ b/Documentation/ABI/testing/sysfs-platform-intel-ifs
@ -1,39 +1,41 @@
 What:		/sys/devices/virtual/misc/intel_ifs_<N>/run_test
-Date:		April 21 2022
-KernelVersion:	5.19
+Date:		Nov 16 2022
+KernelVersion:	6.2
 Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
 Description:	Write <cpu#> to trigger IFS test for one online core.
 		Note that the test is per core. The cpu# can be
 		for any thread on the core. Running on one thread
 		completes the test for the core containing that thread.
 		Example: to test the core containing cpu5: echo 5 >
-		/sys/devices/platform/intel_ifs.<N>/run_test
+		/sys/devices/virtual/misc/intel_ifs_<N>/run_test

 What:		/sys/devices/virtual/misc/intel_ifs_<N>/status
-Date:		April 21 2022
-KernelVersion:	5.19
+Date:		Nov 16 2022
+KernelVersion:	6.2
 Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
 Description:	The status of the last test. It can be one of "pass", "fail"
 		or "untested".

 What:		/sys/devices/virtual/misc/intel_ifs_<N>/details
-Date:		April 21 2022
-KernelVersion:	5.19
+Date:		Nov 16 2022
+KernelVersion:	6.2
 Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
 Description:	Additional information regarding the last test. The details file reports
 		the hex value of the SCAN_STATUS MSR. Note that the error_code field
 		may contain driver defined software code not defined in the Intel SDM.

 What:		/sys/devices/virtual/misc/intel_ifs_<N>/image_version
-Date:		April 21 2022
-KernelVersion:	5.19
+Date:		Nov 16 2022
+KernelVersion:	6.2
 Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
 Description:	Version (hexadecimal) of loaded IFS binary image. If no scan image
 		is loaded reports "none".

-What:		/sys/devices/virtual/misc/intel_ifs_<N>/reload
-Date:		April 21 2022
-KernelVersion:	5.19
+What:		/sys/devices/virtual/misc/intel_ifs_<N>/current_batch
+Date:		Nov 16 2022
+KernelVersion:	6.2
 Contact:	"Jithu Joseph" <jithu.joseph@intel.com>
-Description:	Write "1" (or "y" or "Y") to reload the IFS image from
-		/lib/firmware/intel/ifs/ff-mm-ss.scan.
+Description:	Write a number less than or equal to 0xff to load an IFS test image.
+		The number written treated as the 2 digit suffix in the following file name:
+		/lib/firmware/intel/ifs_<N>/ff-mm-ss-02x.scan
+		Reading the file will provide the suffix of the currently loaded IFS test image.
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@ -95,6 +95,15 @@ htmldocs:
 	@$(srctree)/scripts/sphinx-pre-install --version-check
 	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))

+texinfodocs:
+	@$(srctree)/scripts/sphinx-pre-install --version-check
+	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,texinfo,$(var),texinfo,$(var)))
+
+# Note: the 'info' Make target is generated by sphinx itself when
+# running the texinfodocs target define above.
+infodocs: texinfodocs
+	$(MAKE) -C $(BUILDDIR)/texinfo info
+
 linkcheckdocs:
 	@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,linkcheck,$(var),,$(var)))

@ -143,6 +152,8 @@ cleandocs:
 dochelp:
 	@echo  ' Linux kernel internal documentation in different formats from ReST:'
 	@echo  '  htmldocs        - HTML'
+	@echo  '  texinfodocs     - Texinfo'
+	@echo  '  infodocs        - Info'
 	@echo  '  latexdocs       - LaTeX'
 	@echo  '  pdfdocs         - PDF'
 	@echo  '  epubdocs        - EPUB'
--- a/Documentation/PCI/msi-howto.rst
+++ b/Documentation/PCI/msi-howto.rst
@ -285,3 +285,13 @@ to bridges between the PCI root and the device, MSIs are disabled.
 It is also worth checking the device driver to see whether it supports MSIs.
 For example, it may contain calls to pci_alloc_irq_vectors() with the
 PCI_IRQ_MSI or PCI_IRQ_MSIX flags.
+
+
+List of device drivers MSI(-X) APIs
+===================================
+
+The PCI/MSI subystem has a dedicated C file for its exported device driver
+APIs — `drivers/pci/msi/api.c`. The following functions are exported:
+
+.. kernel-doc:: drivers/pci/msi/api.c
+   :export:
--- a/Documentation/PCI/pci-error-recovery.rst
+++ b/Documentation/PCI/pci-error-recovery.rst
@ -83,6 +83,7 @@ This structure has the form::
 		int (*mmio_enabled)(struct pci_dev *dev);
 		int (*slot_reset)(struct pci_dev *dev);
 		void (*resume)(struct pci_dev *dev);
+		void (*cor_error_detected)(struct pci_dev *dev);
 	};

 The possible channel states are::
@ -422,5 +423,11 @@ That is, the recovery API only requires that:
   - drivers/net/cxgb3
   - drivers/net/s2io.c

+   The cor_error_detected() callback is invoked in handle_error_source() when
+   the error severity is "correctable". The callback is optional and allows
+   additional logging to be done if desired. See example:
+
+   - drivers/cxl/pci.c
+
 The End
 -------
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@ -1858,7 +1858,7 @@ unloaded. After a given module has been unloaded, any attempt to call
 one of its functions results in a segmentation fault. The module-unload
 functions must therefore cancel any delayed calls to loadable-module
 functions, for example, any outstanding mod_timer() must be dealt
-with via del_timer_sync() or similar.
+with via timer_shutdown_sync() or similar.

 Unfortunately, there is no way to cancel an RCU callback; once you
 invoke call_rcu(), the callback function is eventually going to be
--- a/Documentation/RCU/arrayRCU.rst
+++ b/Documentation/RCU/arrayRCU.rst
@ -1,165 +0,0 @@
-.. _array_rcu_doc:
-
-Using RCU to Protect Read-Mostly Arrays
-=======================================
-
-Although RCU is more commonly used to protect linked lists, it can
-also be used to protect arrays.  Three situations are as follows:
-
-1.  :ref:`Hash Tables <hash_tables>`
-
-2.  :ref:`Static Arrays <static_arrays>`
-
-3.  :ref:`Resizable Arrays <resizable_arrays>`
-
-Each of these three situations involves an RCU-protected pointer to an
-array that is separately indexed.  It might be tempting to consider use
-of RCU to instead protect the index into an array, however, this use
-case is **not** supported.  The problem with RCU-protected indexes into
-arrays is that compilers can play way too many optimization games with
-integers, which means that the rules governing handling of these indexes
-are far more trouble than they are worth.  If RCU-protected indexes into
-arrays prove to be particularly valuable (which they have not thus far),
-explicit cooperation from the compiler will be required to permit them
-to be safely used.
-
-That aside, each of the three RCU-protected pointer situations are
-described in the following sections.
-
-.. _hash_tables:
-
-Situation 1: Hash Tables
------------------------
-
-Hash tables are often implemented as an array, where each array entry
-has a linked-list hash chain.  Each hash chain can be protected by RCU
-as described in listRCU.rst.  This approach also applies to other
-array-of-list situations, such as radix trees.
-
-.. _static_arrays:
-
-Situation 2: Static Arrays
--------------------------
-
-Static arrays, where the data (rather than a pointer to the data) is
-located in each array element, and where the array is never resized,
-have not been used with RCU.  Rik van Riel recommends using seqlock in
-this situation, which would also have minimal read-side overhead as long
-as updates are rare.
-
-Quick Quiz:
-		Why is it so important that updates be rare when using seqlock?
-
-:ref:`Answer to Quick Quiz <answer_quick_quiz_seqlock>`
-
-.. _resizable_arrays:
-
-Situation 3: Resizable Arrays
------------------------------
-
-Use of RCU for resizable arrays is demonstrated by the grow_ary()
-function formerly used by the System V IPC code.  The array is used
-to map from semaphore, message-queue, and shared-memory IDs to the data
-structure that represents the corresponding IPC construct.  The grow_ary()
-function does not acquire any locks; instead its caller must hold the
-ids->sem semaphore.
-
-The grow_ary() function, shown below, does some limit checks, allocates a
-new ipc_id_ary, copies the old to the new portion of the new, initializes
-the remainder of the new, updates the ids->entries pointer to point to
-the new array, and invokes ipc_rcu_putref() to free up the old array.
-Note that rcu_assign_pointer() is used to update the ids->entries pointer,
-which includes any memory barriers required on whatever architecture
-you are running on::
-
-	static int grow_ary(struct ipc_ids* ids, int newsize)
-	{
-		struct ipc_id_ary* new;
-		struct ipc_id_ary* old;
-		int i;
-		int size = ids->entries->size;
-
-		if(newsize > IPCMNI)
-			newsize = IPCMNI;
-		if(newsize <= size)
-			return newsize;
-
-		new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
-				    sizeof(struct ipc_id_ary));
-		if(new == NULL)
-			return size;
-		new->size = newsize;
-		memcpy(new->p, ids->entries->p,
-		       sizeof(struct kern_ipc_perm *)*size +
-		       sizeof(struct ipc_id_ary));
-		for(i=size;i<newsize;i++) {
-			new->p[i] = NULL;
-		}
-		old = ids->entries;
-
-		/*
-		 * Use rcu_assign_pointer() to make sure the memcpyed
-		 * contents of the new array are visible before the new
-		 * array becomes visible.
-		 */
-		rcu_assign_pointer(ids->entries, new);
-
-		ipc_rcu_putref(old);
-		return newsize;
-	}
-
-The ipc_rcu_putref() function decrements the array's reference count
-and then, if the reference count has dropped to zero, uses call_rcu()
-to free the array after a grace period has elapsed.
-
-The array is traversed by the ipc_lock() function.  This function
-indexes into the array under the protection of rcu_read_lock(),
-using rcu_dereference() to pick up the pointer to the array so
-that it may later safely be dereferenced -- memory barriers are
-required on the Alpha CPU.  Since the size of the array is stored
-with the array itself, there can be no array-size mismatches, so
-a simple check suffices.  The pointer to the structure corresponding
-to the desired IPC object is placed in "out", with NULL indicating
-a non-existent entry.  After acquiring "out->lock", the "out->deleted"
-flag indicates whether the IPC object is in the process of being
-deleted, and, if not, the pointer is returned::
-
-	struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
-	{
-		struct kern_ipc_perm* out;
-		int lid = id % SEQ_MULTIPLIER;
-		struct ipc_id_ary* entries;
-
-		rcu_read_lock();
-		entries = rcu_dereference(ids->entries);
-		if(lid >= entries->size) {
-			rcu_read_unlock();
-			return NULL;
-		}
-		out = entries->p[lid];
-		if(out == NULL) {
-			rcu_read_unlock();
-			return NULL;
-		}
-		spin_lock(&out->lock);
-
-		/* ipc_rmid() may have already freed the ID while ipc_lock
-		 * was spinning: here verify that the structure is still valid
-		 */
-		if (out->deleted) {
-			spin_unlock(&out->lock);
-			rcu_read_unlock();
-			return NULL;
-		}
-		return out;
-	}
-
-.. _answer_quick_quiz_seqlock:
-
-Answer to Quick Quiz:
-	Why is it so important that updates be rare when using seqlock?
-
-	The reason that it is important that updates be rare when
-	using seqlock is that frequent updates can livelock readers.
-	One way to avoid this problem is to assign a seqlock for
-	each array entry rather than to the entire array.
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@ -32,8 +32,8 @@ over a rather long period of time, but improvements are always welcome!
 	for lockless updates.  This does result in the mildly
 	counter-intuitive situation where rcu_read_lock() and
 	rcu_read_unlock() are used to protect updates, however, this
-	approach provides the same potential simplifications that garbage
-	collectors do.
+	approach can provide the same simplifications to certain types
+	of lockless algorithms that garbage collectors do.

 1.	Does the update code have proper mutual exclusion?

@ -49,12 +49,12 @@ over a rather long period of time, but improvements are always welcome!
 	them -- even x86 allows later loads to be reordered to precede
 	earlier stores), and be prepared to explain why this added
 	complexity is worthwhile.  If you choose #c, be prepared to
-	explain how this single task does not become a major bottleneck on
-	big multiprocessor machines (for example, if the task is updating
-	information relating to itself that other tasks can read, there
-	by definition can be no bottleneck).  Note that the definition
-	of "large" has changed significantly:  Eight CPUs was "large"
-	in the year 2000, but a hundred CPUs was unremarkable in 2017.
+	explain how this single task does not become a major bottleneck
+	on large systems (for example, if the task is updating information
+	relating to itself that other tasks can read, there by definition
+	can be no bottleneck).	Note that the definition of "large" has
+	changed significantly:	Eight CPUs was "large" in the year 2000,
+	but a hundred CPUs was unremarkable in 2017.

 2.	Do the RCU read-side critical sections make proper use of
 	rcu_read_lock() and friends?  These primitives are needed
@ -97,33 +97,38 @@ over a rather long period of time, but improvements are always welcome!

 	b.	Proceed as in (a) above, but also maintain per-element
 		locks (that are acquired by both readers and writers)
-		that guard per-element state.  Of course, fields that
-		the readers refrain from accessing can be guarded by
-		some other lock acquired only by updaters, if desired.
+		that guard per-element state.  Fields that the readers
+		refrain from accessing can be guarded by some other lock
+		acquired only by updaters, if desired.

-		This works quite well, also.
+		This also works quite well.

 	c.	Make updates appear atomic to readers.	For example,
 		pointer updates to properly aligned fields will
 		appear atomic, as will individual atomic primitives.
 		Sequences of operations performed under a lock will *not*
 		appear to be atomic to RCU readers, nor will sequences
-		of multiple atomic primitives.
+		of multiple atomic primitives.	One alternative is to
+		move multiple individual fields to a separate structure,
+		thus solving the multiple-field problem by imposing an
+		additional level of indirection.

 		This can work, but is starting to get a bit tricky.

-	d.	Carefully order the updates and the reads so that
-		readers see valid data at all phases of the update.
-		This is often more difficult than it sounds, especially
-		given modern CPUs' tendency to reorder memory references.
-		One must usually liberally sprinkle memory barriers
-		(smp_wmb(), smp_rmb(), smp_mb()) through the code,
-		making it difficult to understand and to test.
+	d.	Carefully order the updates and the reads so that readers
+		see valid data at all phases of the update.  This is often
+		more difficult than it sounds, especially given modern
+		CPUs' tendency to reorder memory references.  One must
+		usually liberally sprinkle memory-ordering operations
+		through the code, making it difficult to understand and
+		to test.  Where it works, it is better to use things
+		like smp_store_release() and smp_load_acquire(), but in
+		some cases the smp_mb() full memory barrier is required.

-		It is usually better to group the changing data into
-		a separate structure, so that the change may be made
-		to appear atomic by updating a pointer to reference
-		a new structure containing updated values.
+		As noted earlier, it is usually better to group the
+		changing data into a separate structure, so that the
+		change may be made to appear atomic by updating a pointer
+		to reference a new structure containing updated values.

 4.	Weakly ordered CPUs pose special challenges.  Almost all CPUs
 	are weakly ordered -- even x86 CPUs allow later loads to be
@ -188,26 +193,29 @@ over a rather long period of time, but improvements are always welcome!
 		when publicizing a pointer to a structure that can
 		be traversed by an RCU read-side critical section.

-5.	If call_rcu() or call_srcu() is used, the callback function will
-	be called from softirq context.  In particular, it cannot block.
-	If you need the callback to block, run that code in a workqueue
-	handler scheduled from the callback.  The queue_rcu_work()
-	function does this for you in the case of call_rcu().
+5.	If any of call_rcu(), call_srcu(), call_rcu_tasks(),
+	call_rcu_tasks_rude(), or call_rcu_tasks_trace() is used,
+	the callback function may be invoked from softirq context,
+	and in any case with bottom halves disabled.  In particular,
+	this callback function cannot block.  If you need the callback
+	to block, run that code in a workqueue handler scheduled from
+	the callback.  The queue_rcu_work() function does this for you
+	in the case of call_rcu().

 6.	Since synchronize_rcu() can block, it cannot be called
 	from any sort of irq context.  The same rule applies
-	for synchronize_srcu(), synchronize_rcu_expedited(), and
-	synchronize_srcu_expedited().
+	for synchronize_srcu(), synchronize_rcu_expedited(),
+	synchronize_srcu_expedited(), synchronize_rcu_tasks(),
+	synchronize_rcu_tasks_rude(), and synchronize_rcu_tasks_trace().

 	The expedited forms of these primitives have the same semantics
-	as the non-expedited forms, but expediting is both expensive and
-	(with the exception of synchronize_srcu_expedited()) unfriendly
-	to real-time workloads.  Use of the expedited primitives should
-	be restricted to rare configuration-change operations that would
-	not normally be undertaken while a real-time workload is running.
-	However, real-time workloads can use rcupdate.rcu_normal kernel
-	boot parameter to completely disable expedited grace periods,
-	though this might have performance implications.
+	as the non-expedited forms, but expediting is more CPU intensive.
+	Use of the expedited primitives should be restricted to rare
+	configuration-change operations that would not normally be
+	undertaken while a real-time workload is running.  Note that
+	IPI-sensitive real-time workloads can use the rcupdate.rcu_normal
+	kernel boot parameter to completely disable expedited grace
+	periods, though this might have performance implications.

 	In particular, if you find yourself invoking one of the expedited
 	primitives repeatedly in a loop, please do everyone a favor:
@ -215,8 +223,9 @@ over a rather long period of time, but improvements are always welcome!
 	a single non-expedited primitive to cover the entire batch.
 	This will very likely be faster than the loop containing the
 	expedited primitive, and will be much much easier on the rest
-	of the system, especially to real-time workloads running on
-	the rest of the system.
+	of the system, especially to real-time workloads running on the
+	rest of the system.  Alternatively, instead use asynchronous
+	primitives such as call_rcu().

 7.	As of v4.20, a given kernel implements only one RCU flavor, which
 	is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y.
@ -239,7 +248,8 @@ over a rather long period of time, but improvements are always welcome!
 	the corresponding readers must use rcu_read_lock_trace() and
 	rcu_read_unlock_trace().  If an updater uses call_rcu_tasks_rude()
 	or synchronize_rcu_tasks_rude(), then the corresponding readers
-	must use anything that disables interrupts.
+	must use anything that disables preemption, for example,
+	preempt_disable() and preempt_enable().

 	Mixing things up will result in confusion and broken kernels, and
 	has even resulted in an exploitable security issue.  Therefore,
@ -253,15 +263,16 @@ over a rather long period of time, but improvements are always welcome!
 	that this usage is safe is that readers can use anything that
 	disables BH when updaters use call_rcu() or synchronize_rcu().

-8.	Although synchronize_rcu() is slower than is call_rcu(), it
-	usually results in simpler code.  So, unless update performance is
-	critically important, the updaters cannot block, or the latency of
-	synchronize_rcu() is visible from userspace, synchronize_rcu()
-	should be used in preference to call_rcu().  Furthermore,
-	kfree_rcu() usually results in even simpler code than does
-	synchronize_rcu() without synchronize_rcu()'s multi-millisecond
-	latency.  So please take advantage of kfree_rcu()'s "fire and
-	forget" memory-freeing capabilities where it applies.
+8.	Although synchronize_rcu() is slower than is call_rcu(),
+	it usually results in simpler code.  So, unless update
+	performance is critically important, the updaters cannot block,
+	or the latency of synchronize_rcu() is visible from userspace,
+	synchronize_rcu() should be used in preference to call_rcu().
+	Furthermore, kfree_rcu() and kvfree_rcu() usually result
+	in even simpler code than does synchronize_rcu() without
+	synchronize_rcu()'s multi-millisecond latency.	So please take
+	advantage of kfree_rcu()'s and kvfree_rcu()'s "fire and forget"
+	memory-freeing capabilities where it applies.

 	An especially important property of the synchronize_rcu()
 	primitive is that it automatically self-limits: if grace periods
@ -271,8 +282,8 @@ over a rather long period of time, but improvements are always welcome!
 	cases where grace periods are delayed, as failing to do so can
 	result in excessive realtime latencies or even OOM conditions.

-	Ways of gaining this self-limiting property when using call_rcu()
-	include:
+	Ways of gaining this self-limiting property when using call_rcu(),
+	kfree_rcu(), or kvfree_rcu() include:

 	a.	Keeping a count of the number of data-structure elements
 		used by the RCU-protected data structure, including
@ -304,18 +315,21 @@ over a rather long period of time, but improvements are always welcome!
 		here is that superuser already has lots of ways to crash
 		the machine.

-	d.	Periodically invoke synchronize_rcu(), permitting a limited
-		number of updates per grace period.  Better yet, periodically
-		invoke rcu_barrier() to wait for all outstanding callbacks.
+	d.	Periodically invoke rcu_barrier(), permitting a limited
+		number of updates per grace period.

-	The same cautions apply to call_srcu() and kfree_rcu().
+	The same cautions apply to call_srcu(), call_rcu_tasks(),
+	call_rcu_tasks_rude(), and call_rcu_tasks_trace().  This is
+	why there is an srcu_barrier(), rcu_barrier_tasks(),
+	rcu_barrier_tasks_rude(), and rcu_barrier_tasks_rude(),
+	respectively.

-	Note that although these primitives do take action to avoid memory
-	exhaustion when any given CPU has too many callbacks, a determined
-	user could still exhaust memory.  This is especially the case
-	if a system with a large number of CPUs has been configured to
-	offload all of its RCU callbacks onto a single CPU, or if the
-	system has relatively little free memory.
+	Note that although these primitives do take action to avoid
+	memory exhaustion when any given CPU has too many callbacks,
+	a determined user or administrator can still exhaust memory.
+	This is especially the case if a system with a large number of
+	CPUs has been configured to offload all of its RCU callbacks onto
+	a single CPU, or if the system has relatively little free memory.

 9.	All RCU list-traversal primitives, which include
 	rcu_dereference(), list_for_each_entry_rcu(), and
@ -344,14 +358,14 @@ over a rather long period of time, but improvements are always welcome!
 	and you don't hold the appropriate update-side lock, you *must*
 	use the "_rcu()" variants of the list macros.  Failing to do so
 	will break Alpha, cause aggressive compilers to generate bad code,
-	and confuse people trying to read your code.
+	and confuse people trying to understand your code.

 11.	Any lock acquired by an RCU callback must be acquired elsewhere
-	with softirq disabled, e.g., via spin_lock_irqsave(),
-	spin_lock_bh(), etc.  Failing to disable softirq on a given
-	acquisition of that lock will result in deadlock as soon as
-	the RCU softirq handler happens to run your RCU callback while
-	interrupting that acquisition's critical section.
+	with softirq disabled, e.g., via spin_lock_bh().  Failing to
+	disable softirq on a given acquisition of that lock will result
+	in deadlock as soon as the RCU softirq handler happens to run
+	your RCU callback while interrupting that acquisition's critical
+	section.

 12.	RCU callbacks can be and are executed in parallel.  In many cases,
 	the callback code simply wrappers around kfree(), so that this
@ -372,7 +386,17 @@ over a rather long period of time, but improvements are always welcome!
 	for some  real-time workloads, this is the whole point of using
 	the rcu_nocbs= kernel boot parameter.

-13.	Unlike other forms of RCU, it *is* permissible to block in an
+	In addition, do not assume that callbacks queued in a given order
+	will be invoked in that order, even if they all are queued on the
+	same CPU.  Furthermore, do not assume that same-CPU callbacks will
+	be invoked serially.  For example, in recent kernels, CPUs can be
+	switched between offloaded and de-offloaded callback invocation,
+	and while a given CPU is undergoing such a switch, its callbacks
+	might be concurrently invoked by that CPU's softirq handler and
+	that CPU's rcuo kthread.  At such times, that CPU's callbacks
+	might be executed both concurrently and out of order.
+
+13.	Unlike most flavors of RCU, it *is* permissible to block in an
 	SRCU read-side critical section (demarked by srcu_read_lock()
 	and srcu_read_unlock()), hence the "SRCU": "sleepable RCU".
 	Please note that if you don't need to sleep in read-side critical
@ -412,6 +436,12 @@ over a rather long period of time, but improvements are always welcome!
 	never sends IPIs to other CPUs, so it is easier on
 	real-time workloads than is synchronize_rcu_expedited().

+	It is also permissible to sleep in RCU Tasks Trace read-side
+	critical, which are delimited by rcu_read_lock_trace() and
+	rcu_read_unlock_trace().  However, this is a specialized flavor
+	of RCU, and you should not use it without first checking with
+	its current users.  In most cases, you should instead use SRCU.
+
 	Note that rcu_assign_pointer() relates to SRCU just as it does to
 	other forms of RCU, but instead of rcu_dereference() you should
 	use srcu_dereference() in order to avoid lockdep splats.
@ -442,50 +472,62 @@ over a rather long period of time, but improvements are always welcome!
 	find problems as follows:

 	CONFIG_PROVE_LOCKING:
-		check that accesses to RCU-protected data
-		structures are carried out under the proper RCU
-		read-side critical section, while holding the right
-		combination of locks, or whatever other conditions
-		are appropriate.
+		check that accesses to RCU-protected data structures
+		are carried out under the proper RCU read-side critical
+		section, while holding the right combination of locks,
+		or whatever other conditions are appropriate.

 	CONFIG_DEBUG_OBJECTS_RCU_HEAD:
-		check that you don't pass the
-		same object to call_rcu() (or friends) before an RCU
-		grace period has elapsed since the last time that you
-		passed that same object to call_rcu() (or friends).
+		check that you don't pass the same object to call_rcu()
+		(or friends) before an RCU grace period has elapsed
+		since the last time that you passed that same object to
+		call_rcu() (or friends).

 	__rcu sparse checks:
-		tag the pointer to the RCU-protected data
-		structure with __rcu, and sparse will warn you if you
-		access that pointer without the services of one of the
-		variants of rcu_dereference().
+		tag the pointer to the RCU-protected data structure
+		with __rcu, and sparse will warn you if you access that
+		pointer without the services of one of the variants
+		of rcu_dereference().

 	These debugging aids can help you find problems that are
 	otherwise extremely difficult to spot.

-17.	If you register a callback using call_rcu() or call_srcu(), and
-	pass in a function defined within a loadable module, then it in
-	necessary to wait for all pending callbacks to be invoked after
-	the last invocation and before unloading that module.  Note that
-	it is absolutely *not* sufficient to wait for a grace period!
-	The current (say) synchronize_rcu() implementation is *not*
-	guaranteed to wait for callbacks registered on other CPUs.
-	Or even on the current CPU if that CPU recently went offline
-	and came back online.
+17.	If you pass a callback function defined within a module to one of
+	call_rcu(), call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(),
+	or call_rcu_tasks_trace(), then it is necessary to wait for all
+	pending callbacks to be invoked before unloading that module.
+	Note that it is absolutely *not* sufficient to wait for a grace
+	period!  For example, synchronize_rcu() implementation is *not*
+	guaranteed to wait for callbacks registered on other CPUs via
+	call_rcu().  Or even on the current CPU if that CPU recently
+	went offline and came back online.

 	You instead need to use one of the barrier functions:

 	-	call_rcu() -> rcu_barrier()
 	-	call_srcu() -> srcu_barrier()
+	-	call_rcu_tasks() -> rcu_barrier_tasks()
+	-	call_rcu_tasks_rude() -> rcu_barrier_tasks_rude()
+	-	call_rcu_tasks_trace() -> rcu_barrier_tasks_trace()

 	However, these barrier functions are absolutely *not* guaranteed
-	to wait for a grace period.  In fact, if there are no call_rcu()
-	callbacks waiting anywhere in the system, rcu_barrier() is within
-	its rights to return immediately.
+	to wait for a grace period.  For example, if there are no
+	call_rcu() callbacks queued anywhere in the system, rcu_barrier()
+	can and will return immediately.

-	So if you need to wait for both an RCU grace period and for
-	all pre-existing call_rcu() callbacks, you will need to execute
-	both rcu_barrier() and synchronize_rcu(), if necessary, using
-	something like workqueues to execute them concurrently.
+	So if you need to wait for both a grace period and for all
+	pre-existing callbacks, you will need to invoke both functions,
+	with the pair depending on the flavor of RCU:
+
+	-	Either synchronize_rcu() or synchronize_rcu_expedited(),
+		together with rcu_barrier()
+	-	Either synchronize_srcu() or synchronize_srcu_expedited(),
+		together with and srcu_barrier()
+	-	synchronize_rcu_tasks() and rcu_barrier_tasks()
+	-	synchronize_tasks_rude() and rcu_barrier_tasks_rude()
+	-	synchronize_tasks_trace() and rcu_barrier_tasks_trace()
+
+	If necessary, you can use something like workqueues to execute
+	the requisite pair of functions concurrently.

 	See rcubarrier.rst for more information.
--- a/Documentation/RCU/index.rst
+++ b/Documentation/RCU/index.rst
@ -9,7 +9,6 @@ RCU concepts
 .. toctree::
   :maxdepth: 3

-   arrayRCU
   checklist
   lockdep
   lockdep-splat
--- a/Documentation/RCU/listRCU.rst
+++ b/Documentation/RCU/listRCU.rst
@ -3,11 +3,10 @@
 Using RCU to Protect Read-Mostly Linked Lists
 =============================================

-One of the best applications of RCU is to protect read-mostly linked lists
-(``struct list_head`` in list.h).  One big advantage of this approach
-is that all of the required memory barriers are included for you in
-the list macros.  This document describes several applications of RCU,
-with the best fits first.
+One of the most common uses of RCU is protecting read-mostly linked lists
+(``struct list_head`` in list.h).  One big advantage of this approach is
+that all of the required memory ordering is provided by the list macros.
+This document describes several list-based RCU use cases.


 Example 1: Read-mostly list: Deferred Destruction
@ -35,7 +34,8 @@ The code traversing the list of all processes typically looks like::
 	}
 	rcu_read_unlock();

-The simplified code for removing a process from a task list is::
+The simplified and heavily inlined code for removing a process from a
+task list is::

 	void release_task(struct task_struct *p)
 	{
@ -45,39 +45,48 @@ The simplified code for removing a process from a task list is::
 		call_rcu(&p->rcu, delayed_put_task_struct);
 	}

-When a process exits, ``release_task()`` calls ``list_del_rcu(&p->tasks)`` under
-``tasklist_lock`` writer lock protection, to remove the task from the list of
-all tasks. The ``tasklist_lock`` prevents concurrent list additions/removals
-from corrupting the list. Readers using ``for_each_process()`` are not protected
-with the ``tasklist_lock``. To prevent readers from noticing changes in the list
-pointers, the ``task_struct`` object is freed only after one or more grace
-periods elapse (with the help of call_rcu()). This deferring of destruction
-ensures that any readers traversing the list will see valid ``p->tasks.next``
-pointers and deletion/freeing can happen in parallel with traversal of the list.
-This pattern is also called an **existence lock**, since RCU pins the object in
-memory until all existing readers finish.
+When a process exits, ``release_task()`` calls ``list_del_rcu(&p->tasks)``
+via __exit_signal() and __unhash_process() under ``tasklist_lock``
+writer lock protection.  The list_del_rcu() invocation removes
+the task from the list of all tasks. The ``tasklist_lock``
+prevents concurrent list additions/removals from corrupting the
+list. Readers using ``for_each_process()`` are not protected with the
+``tasklist_lock``. To prevent readers from noticing changes in the list
+pointers, the ``task_struct`` object is freed only after one or more
+grace periods elapse, with the help of call_rcu(), which is invoked via
+put_task_struct_rcu_user(). This deferring of destruction ensures that
+any readers traversing the list will see valid ``p->tasks.next`` pointers
+and deletion/freeing can happen in parallel with traversal of the list.
+This pattern is also called an **existence lock**, since RCU refrains
+from invoking the delayed_put_task_struct() callback function until
+all existing readers finish, which guarantees that the ``task_struct``
+object in question will remain in existence until after the completion
+of all RCU readers that might possibly have a reference to that object.


 Example 2: Read-Side Action Taken Outside of Lock: No In-Place Updates
 ----------------------------------------------------------------------

-The best applications are cases where, if reader-writer locking were
-used, the read-side lock would be dropped before taking any action
-based on the results of the search.  The most celebrated example is
-the routing table.  Because the routing table is tracking the state of
-equipment outside of the computer, it will at times contain stale data.
-Therefore, once the route has been computed, there is no need to hold
-the routing table static during transmission of the packet.  After all,
-you can hold the routing table static all you want, but that won't keep
-the external Internet from changing, and it is the state of the external
-Internet that really matters.  In addition, routing entries are typically
-added or deleted, rather than being modified in place.
+Some reader-writer locking use cases compute a value while holding
+the read-side lock, but continue to use that value after that lock is
+released.  These use cases are often good candidates for conversion
+to RCU.  One prominent example involves network packet routing.
+Because the packet-routing data tracks the state of equipment outside
+of the computer, it will at times contain stale data.  Therefore, once
+the route has been computed, there is no need to hold the routing table
+static during transmission of the packet.  After all, you can hold the
+routing table static all you want, but that won't keep the external
+Internet from changing, and it is the state of the external Internet
+that really matters.  In addition, routing entries are typically added
+or deleted, rather than being modified in place.  This is a rare example
+of the finite speed of light and the non-zero size of atoms actually
+helping make synchronization be lighter weight.

-A straightforward example of this use of RCU may be found in the
-system-call auditing support.  For example, a reader-writer locked
+A straightforward example of this type of RCU use case may be found in
+the system-call auditing support.  For example, a reader-writer locked
 implementation of ``audit_filter_task()`` might be as follows::

-	static enum audit_state audit_filter_task(struct task_struct *tsk)
+	static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
 	{
 		struct audit_entry *e;
 		enum audit_state   state;
@ -86,6 +95,8 @@ implementation of ``audit_filter_task()`` might be as follows::
 		/* Note: audit_filter_mutex held by caller. */
 		list_for_each_entry(e, &audit_tsklist, list) {
 			if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+				if (state == AUDIT_STATE_RECORD)
+					*key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
 				read_unlock(&auditsc_lock);
 				return state;
 			}
@ -101,7 +112,7 @@ you are turning auditing off, it is OK to audit a few extra system calls.

 This means that RCU can be easily applied to the read side, as follows::

-	static enum audit_state audit_filter_task(struct task_struct *tsk)
+	static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
 	{
 		struct audit_entry *e;
 		enum audit_state   state;
@ -110,6 +121,8 @@ This means that RCU can be easily applied to the read side, as follows::
 		/* Note: audit_filter_mutex held by caller. */
 		list_for_each_entry_rcu(e, &audit_tsklist, list) {
 			if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+				if (state == AUDIT_STATE_RECORD)
+					*key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
 				rcu_read_unlock();
 				return state;
 			}
@ -118,13 +131,15 @@ This means that RCU can be easily applied to the read side, as follows::
 		return AUDIT_BUILD_CONTEXT;
 	}

-The ``read_lock()`` and ``read_unlock()`` calls have become rcu_read_lock()
-and rcu_read_unlock(), respectively, and the list_for_each_entry() has
-become list_for_each_entry_rcu().  The **_rcu()** list-traversal primitives
-insert the read-side memory barriers that are required on DEC Alpha CPUs.
+The read_lock() and read_unlock() calls have become rcu_read_lock()
+and rcu_read_unlock(), respectively, and the list_for_each_entry()
+has become list_for_each_entry_rcu().  The **_rcu()** list-traversal
+primitives add READ_ONCE() and diagnostic checks for incorrect use
+outside of an RCU read-side critical section.

 The changes to the update side are also straightforward. A reader-writer lock
-might be used as follows for deletion and insertion::
+might be used as follows for deletion and insertion in these simplified
+versions of audit_del_rule() and audit_add_rule()::

 	static inline int audit_del_rule(struct audit_rule *rule,
 					 struct list_head *list)
@ -188,16 +203,16 @@ Following are the RCU equivalents for these two functions::
 		return 0;
 	}

-Normally, the ``write_lock()`` and ``write_unlock()`` would be replaced by a
+Normally, the write_lock() and write_unlock() would be replaced by a
 spin_lock() and a spin_unlock(). But in this case, all callers hold
 ``audit_filter_mutex``, so no additional locking is required. The
-``auditsc_lock`` can therefore be eliminated, since use of RCU eliminates the
+auditsc_lock can therefore be eliminated, since use of RCU eliminates the
 need for writers to exclude readers.

 The list_del(), list_add(), and list_add_tail() primitives have been
 replaced by list_del_rcu(), list_add_rcu(), and list_add_tail_rcu().
-The **_rcu()** list-manipulation primitives add memory barriers that are needed on
-weakly ordered CPUs (most of them!).  The list_del_rcu() primitive omits the
+The **_rcu()** list-manipulation primitives add memory barriers that are
+needed on weakly ordered CPUs.  The list_del_rcu() primitive omits the
 pointer poisoning debug-assist code that would otherwise cause concurrent
 readers to fail spectacularly.

@ -238,7 +253,9 @@ need to be filled in)::
 The RCU version creates a copy, updates the copy, then replaces the old
 entry with the newly updated entry.  This sequence of actions, allowing
 concurrent reads while making a copy to perform an update, is what gives
-RCU (*read-copy update*) its name.  The RCU code is as follows::
+RCU (*read-copy update*) its name.
+
+The RCU version of audit_upd_rule() is as follows::

 	static inline int audit_upd_rule(struct audit_rule *rule,
 					 struct list_head *list,
@ -267,6 +284,9 @@ RCU (*read-copy update*) its name.  The RCU code is as follows::
 Again, this assumes that the caller holds ``audit_filter_mutex``.  Normally, the
 writer lock would become a spinlock in this sort of code.

+The update_lsm_rule() does something very similar, for those who would
+prefer to look at real Linux-kernel code.
+
 Another use of this pattern can be found in the openswitch driver's *connection
 tracking table* code in ``ct_limit_set()``.  The table holds connection tracking
 entries and has a limit on the maximum entries.  There is one such table
@ -281,9 +301,10 @@ Example 4: Eliminating Stale Data
 ---------------------------------

 The auditing example above tolerates stale data, as do most algorithms
-that are tracking external state.  Because there is a delay from the
-time the external state changes before Linux becomes aware of the change,
-additional RCU-induced staleness is generally not a problem.
+that are tracking external state.  After all, given there is a delay
+from the time the external state changes before Linux becomes aware
+of the change, and so as noted earlier, a small quantity of additional
+RCU-induced staleness is generally not a problem.

 However, there are many examples where stale data cannot be tolerated.
 One example in the Linux kernel is the System V IPC (see the shm_lock()
@ -302,7 +323,7 @@ Quick Quiz:

 If the system-call audit module were to ever need to reject stale data, one way
 to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to the
-audit_entry structure, and modify ``audit_filter_task()`` as follows::
+``audit_entry`` structure, and modify audit_filter_task() as follows::

 	static enum audit_state audit_filter_task(struct task_struct *tsk)
 	{
@ -319,6 +340,8 @@ audit_entry structure, and modify ``audit_filter_task()`` as follows::
 					return AUDIT_BUILD_CONTEXT;
 				}
 				rcu_read_unlock();
+				if (state == AUDIT_STATE_RECORD)
+					*key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
 				return state;
 			}
 		}
@ -326,12 +349,6 @@ audit_entry structure, and modify ``audit_filter_task()`` as follows::
 		return AUDIT_BUILD_CONTEXT;
 	}

-Note that this example assumes that entries are only added and deleted.
-Additional mechanism is required to deal correctly with the update-in-place
-performed by ``audit_upd_rule()``.  For one thing, ``audit_upd_rule()`` would
-need additional memory barriers to ensure that the list_add_rcu() was really
-executed before the list_del_rcu().
-
 The ``audit_del_rule()`` function would need to set the ``deleted`` flag under the
 spinlock as follows::

@ -357,24 +374,32 @@ spinlock as follows::

 This too assumes that the caller holds ``audit_filter_mutex``.

+Note that this example assumes that entries are only added and deleted.
+Additional mechanism is required to deal correctly with the update-in-place
+performed by audit_upd_rule().  For one thing, audit_upd_rule() would
+need to hold the locks of both the old ``audit_entry`` and its replacement
+while executing the list_replace_rcu().
+

 Example 5: Skipping Stale Objects
 ---------------------------------

-For some usecases, reader performance can be improved by skipping stale objects
-during read-side list traversal if the object in concern is pending destruction
-after one or more grace periods. One such example can be found in the timerfd
-subsystem. When a ``CLOCK_REALTIME`` clock is reprogrammed - for example due to
-setting of the system time, then all programmed timerfds that depend on this
-clock get triggered and processes waiting on them to expire are woken up in
-advance of their scheduled expiry. To facilitate this, all such timers are added
-to an RCU-managed ``cancel_list`` when they are setup in
+For some use cases, reader performance can be improved by skipping
+stale objects during read-side list traversal, where stale objects
+are those that will be removed and destroyed after one or more grace
+periods. One such example can be found in the timerfd subsystem. When a
+``CLOCK_REALTIME`` clock is reprogrammed (for example due to setting
+of the system time) then all programmed ``timerfds`` that depend on
+this clock get triggered and processes waiting on them are awakened in
+advance of their scheduled expiry. To facilitate this, all such timers
+are added to an RCU-managed ``cancel_list`` when they are setup in
 ``timerfd_setup_cancel()``::

 	static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
 	{
 		spin_lock(&ctx->cancel_lock);
-		if ((ctx->clockid == CLOCK_REALTIME &&
+		if ((ctx->clockid == CLOCK_REALTIME ||
+		     ctx->clockid == CLOCK_REALTIME_ALARM) &&
 		    (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
 			if (!ctx->might_cancel) {
 				ctx->might_cancel = true;
@ -382,13 +407,16 @@ to an RCU-managed ``cancel_list`` when they are setup in
 				list_add_rcu(&ctx->clist, &cancel_list);
 				spin_unlock(&cancel_lock);
 			}
+		} else {
+			__timerfd_remove_cancel(ctx);
 		}
 		spin_unlock(&ctx->cancel_lock);
 	}

-When a timerfd is freed (fd is closed), then the ``might_cancel`` flag of the
-timerfd object is cleared, the object removed from the ``cancel_list`` and
-destroyed::
+When a timerfd is freed (fd is closed), then the ``might_cancel``
+flag of the timerfd object is cleared, the object removed from the
+``cancel_list`` and destroyed, as shown in this simplified and inlined
+version of timerfd_release()::

 	int timerfd_release(struct inode *inode, struct file *file)
 	{
@ -403,7 +431,10 @@ destroyed::
 		}
 		spin_unlock(&ctx->cancel_lock);

-		hrtimer_cancel(&ctx->t.tmr);
+		if (isalarm(ctx))
+			alarm_cancel(&ctx->t.alarm);
+		else
+			hrtimer_cancel(&ctx->t.tmr);
 		kfree_rcu(ctx, rcu);
 		return 0;
 	}
@ -416,6 +447,7 @@ objects::

 	void timerfd_clock_was_set(void)
 	{
+		ktime_t moffs = ktime_mono_to_real(0);
 		struct timerfd_ctx *ctx;
 		unsigned long flags;

@ -424,7 +456,7 @@ objects::
 			if (!ctx->might_cancel)
 				continue;
 			spin_lock_irqsave(&ctx->wqh.lock, flags);
-			if (ctx->moffs != ktime_mono_to_real(0)) {
+			if (ctx->moffs != moffs) {
 				ctx->moffs = KTIME_MAX;
 				ctx->ticks++;
 				wake_up_locked_poll(&ctx->wqh, EPOLLIN);
@ -434,10 +466,10 @@ objects::
 		rcu_read_unlock();
 	}

-The key point here is, because RCU-traversal of the ``cancel_list`` happens
-while objects are being added and removed to the list, sometimes the traversal
-can step on an object that has been removed from the list. In this example, it
-is seen that it is better to skip such objects using a flag.
+The key point is that because RCU-protected traversal of the
+``cancel_list`` happens concurrently with object addition and removal,
+sometimes the traversal can access an object that has been removed from
+the list. In this example, a flag is used to skip such objects.


 Summary
--- a/Documentation/RCU/lockdep.rst
+++ b/Documentation/RCU/lockdep.rst
@ -17,7 +17,9 @@ state::
 	rcu_read_lock_held() for normal RCU.
 	rcu_read_lock_bh_held() for RCU-bh.
 	rcu_read_lock_sched_held() for RCU-sched.
+	rcu_read_lock_any_held() for any of normal RCU, RCU-bh, and RCU-sched.
 	srcu_read_lock_held() for SRCU.
+	rcu_read_lock_trace_held() for RCU Tasks Trace.

 These functions are conservative, and will therefore return 1 if they
 aren't certain (for example, if CONFIG_DEBUG_LOCK_ALLOC is not set).
@ -53,6 +55,8 @@ checking of rcu_dereference() primitives:
 		is invoked by both SRCU readers and updaters.
 	rcu_dereference_raw(p):
 		Don't check.  (Use sparingly, if at all.)
+	rcu_dereference_raw_check(p):
+		Don't do lockdep at all.  (Use sparingly, if at all.)
 	rcu_dereference_protected(p, c):
 		Use explicit check expression "c", and omit all barriers
 		and compiler constraints.  This is useful when the data
--- a/Documentation/accel/index.rst
+++ b/Documentation/accel/index.rst
@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Compute Accelerators
+====================
+
+.. toctree::
+   :maxdepth: 1
+
+   introduction
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
--- a/Documentation/accel/introduction.rst
+++ b/Documentation/accel/introduction.rst
@ -0,0 +1,110 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Introduction
+============
+
+The Linux compute accelerators subsystem is designed to expose compute
+accelerators in a common way to user-space and provide a common set of
+functionality.
+
+These devices can be either stand-alone ASICs or IP blocks inside an SoC/GPU.
+Although these devices are typically designed to accelerate
+Machine-Learning (ML) and/or Deep-Learning (DL) computations, the accel layer
+is not limited to handling these types of accelerators.
+
+Typically, a compute accelerator will belong to one of the following
+categories:
+
+- Edge AI - doing inference at an edge device. It can be an embedded ASIC/FPGA,
+  or an IP inside a SoC (e.g. laptop web camera). These devices
+  are typically configured using registers and can work with or without DMA.
+
+- Inference data-center - single/multi user devices in a large server. This
+  type of device can be stand-alone or an IP inside a SoC or a GPU. It will
+  have on-board DRAM (to hold the DL topology), DMA engines and
+  command submission queues (either kernel or user-space queues).
+  It might also have an MMU to manage multiple users and might also enable
+  virtualization (SR-IOV) to support multiple VMs on the same device. In
+  addition, these devices will usually have some tools, such as profiler and
+  debugger.
+
+- Training data-center - Similar to Inference data-center cards, but typically
+  have more computational power and memory b/w (e.g. HBM) and will likely have
+  a method of scaling-up/out, i.e. connecting to other training cards inside
+  the server or in other servers, respectively.
+
+All these devices typically have different runtime user-space software stacks,
+that are tailored-made to their h/w. In addition, they will also probably
+include a compiler to generate programs to their custom-made computational
+engines. Typically, the common layer in user-space will be the DL frameworks,
+such as PyTorch and TensorFlow.
+
+Sharing code with DRM
+=====================
+
+Because this type of devices can be an IP inside GPUs or have similar
+characteristics as those of GPUs, the accel subsystem will use the
+DRM subsystem's code and functionality. i.e. the accel core code will
+be part of the DRM subsystem and an accel device will be a new type of DRM
+device.
+
+This will allow us to leverage the extensive DRM code-base and
+collaborate with DRM developers that have experience with this type of
+devices. In addition, new features that will be added for the accelerator
+drivers can be of use to GPU drivers as well.
+
+Differentiation from GPUs
+=========================
+
+Because we want to prevent the extensive user-space graphic software stack
+from trying to use an accelerator as a GPU, the compute accelerators will be
+differentiated from GPUs by using a new major number and new device char files.
+
+Furthermore, the drivers will be located in a separate place in the kernel
+tree - drivers/accel/.
+
+The accelerator devices will be exposed to the user space with the dedicated
+261 major number and will have the following convention:
+
+- device char files - /dev/accel/accel*
+- sysfs             - /sys/class/accel/accel*/
+- debugfs           - /sys/kernel/debug/accel/accel*/
+
+Getting Started
+===============
+
+First, read the DRM documentation at Documentation/gpu/index.rst.
+Not only it will explain how to write a new DRM driver but it will also
+contain all the information on how to contribute, the Code Of Conduct and
+what is the coding style/documentation. All of that is the same for the
+accel subsystem.
+
+Second, make sure the kernel is configured with CONFIG_DRM_ACCEL.
+
+To expose your device as an accelerator, two changes are needed to
+be done in your driver (as opposed to a standard DRM driver):
+
+- Add the DRIVER_COMPUTE_ACCEL feature flag in your drm_driver's
+  driver_features field. It is important to note that this driver feature is
+  mutually exclusive with DRIVER_RENDER and DRIVER_MODESET. Devices that want
+  to expose both graphics and compute device char files should be handled by
+  two drivers that are connected using the auxiliary bus framework.
+
+- Change the open callback in your driver fops structure to accel_open().
+  Alternatively, your driver can use DEFINE_DRM_ACCEL_FOPS macro to easily
+  set the correct function operations pointers structure.
+
+External References
+===================
+
+email threads
+-------------
+
+* `Initial discussion on the New subsystem for acceleration devices <https://lkml.org/lkml/2022/7/31/83>`_ - Oded Gabbay (2022)
+* `patch-set to add the new subsystem <https://lkml.org/lkml/2022/10/22/544>`_ - Oded Gabbay (2022)
+
+Conference talks
+----------------
+
+* `LPC 2022 Accelerators BOF outcomes summary <https://airlied.blogspot.com/2022/09/accelerators-bof-outcomes-summary.html>`_ - Dave Airlie (2022)
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@ -348,8 +348,13 @@ this can be accomplished with::

        echo huge_idle > /sys/block/zramX/writeback

+If a user chooses to writeback only incompressible pages (pages that none of
+algorithms can compress) this can be accomplished with::
+
+	echo incompressible > /sys/block/zramX/writeback
+
 If an admin wants to write a specific page in zram device to the backing device,
-they could write a page index into the interface.
+they could write a page index into the interface::

 	echo "page_index=1251" > /sys/block/zramX/writeback

@ -401,6 +406,87 @@ budget in next setting is user's job.
 If admin wants to measure writeback count in a certain period, they could
 know it via /sys/block/zram0/bd_stat's 3rd column.

+recompression
+-------------
+
+With CONFIG_ZRAM_MULTI_COMP, zram can recompress pages using alternative
+(secondary) compression algorithms. The basic idea is that alternative
+compression algorithm can provide better compression ratio at a price of
+(potentially) slower compression/decompression speeds. Alternative compression
+algorithm can, for example, be more successful compressing huge pages (those
+that default algorithm failed to compress). Another application is idle pages
+recompression - pages that are cold and sit in the memory can be recompressed
+using more effective algorithm and, hence, reduce zsmalloc memory usage.
+
+With CONFIG_ZRAM_MULTI_COMP, zram supports up to 4 compression algorithms:
+one primary and up to 3 secondary ones. Primary zram compressor is explained
+in "3) Select compression algorithm", secondary algorithms are configured
+using recomp_algorithm device attribute.
+
+Example:::
+
+	#show supported recompression algorithms
+	cat /sys/block/zramX/recomp_algorithm
+	#1: lzo lzo-rle lz4 lz4hc [zstd]
+	#2: lzo lzo-rle lz4 [lz4hc] zstd
+
+Alternative compression algorithms are sorted by priority. In the example
+above, zstd is used as the first alternative algorithm, which has priority
+of 1, while lz4hc is configured as a compression algorithm with priority 2.
+Alternative compression algorithm's priority is provided during algorithms
+configuration:::
+
+	#select zstd recompression algorithm, priority 1
+	echo "algo=zstd priority=1" > /sys/block/zramX/recomp_algorithm
+
+	#select deflate recompression algorithm, priority 2
+	echo "algo=deflate priority=2" > /sys/block/zramX/recomp_algorithm
+
+Another device attribute that CONFIG_ZRAM_MULTI_COMP enables is recompress,
+which controls recompression.
+
+Examples:::
+
+	#IDLE pages recompression is activated by `idle` mode
+	echo "type=idle" > /sys/block/zramX/recompress
+
+	#HUGE pages recompression is activated by `huge` mode
+	echo "type=huge" > /sys/block/zram0/recompress
+
+	#HUGE_IDLE pages recompression is activated by `huge_idle` mode
+	echo "type=huge_idle" > /sys/block/zramX/recompress
+
+The number of idle pages can be significant, so user-space can pass a size
+threshold (in bytes) to the recompress knob: zram will recompress only pages
+of equal or greater size:::
+
+	#recompress all pages larger than 3000 bytes
+	echo "threshold=3000" > /sys/block/zramX/recompress
+
+	#recompress idle pages larger than 2000 bytes
+	echo "type=idle threshold=2000" > /sys/block/zramX/recompress
+
+Recompression of idle pages requires memory tracking.
+
+During re-compression for every page, that matches re-compression criteria,
+ZRAM iterates the list of registered alternative compression algorithms in
+order of their priorities. ZRAM stops either when re-compression was
+successful (re-compressed object is smaller in size than the original one)
+and matches re-compression criteria (e.g. size threshold) or when there are
+no secondary algorithms left to try. If none of the secondary algorithms can
+successfully re-compressed the page such a page is marked as incompressible,
+so ZRAM will not attempt to re-compress it in the future.
+
+This re-compression behaviour, when it iterates through the list of
+registered compression algorithms, increases our chances of finding the
+algorithm that successfully compresses a particular page. Sometimes, however,
+it is convenient (and sometimes even necessary) to limit recompression to
+only one particular algorithm so that it will not try any other algorithms.
+This can be achieved by providing a algo=NAME parameter:::
+
+	#use zstd algorithm only (if registered)
+	echo "type=huge algo=zstd" > /sys/block/zramX/recompress
+
 memory tracking
 ===============

@ -411,9 +497,11 @@ pages of the process with*pagemap.
 If you enable the feature, you could see block state via
 /sys/kernel/debug/zram/zram0/block_state". The output is as follows::

-	  300    75.033841 .wh.
-	  301    63.806904 s...
-	  302    63.806919 ..hi
+	  300    75.033841 .wh...
+	  301    63.806904 s.....
+	  302    63.806919 ..hi..
+	  303    62.801919 ....r.
+	  304   146.781902 ..hi.n

 First column
 	zram's block index.
@ -430,6 +518,10 @@ Third column
 		huge page
 	i:
 		idle page
+	r:
+		recompressed page (secondary compression algorithm)
+	n:
+		none (including secondary) of algorithms could compress it

 First line of above example says 300th block is accessed at 75.033841sec
 and the block's state is huge so it is written back to the backing
--- a/Documentation/admin-guide/bootconfig.rst
+++ b/Documentation/admin-guide/bootconfig.rst
@ -229,7 +229,7 @@ In addition to the kernel command line, the boot config can be used for
 passing the kernel parameters. All the key-value pairs under ``kernel``
 key will be passed to kernel cmdline directly. Moreover, the key-value
 pairs under ``init`` will be passed to init process via the cmdline.
-The parameters are concatinated with user-given kernel cmdline string
+The parameters are concatenated with user-given kernel cmdline string
 as the following order, so that the command line parameter can override
 bootconfig parameters (this depends on how the subsystem handles parameters
 but in general, earlier parameter will be overwritten by later one.)::
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@ -543,7 +543,8 @@ inactive_anon	# of bytes of anonymous and swap cache memory on inactive
 		LRU list.
 active_anon	# of bytes of anonymous and swap cache memory on active
 		LRU list.
-inactive_file	# of bytes of file-backed memory on inactive LRU list.
+inactive_file	# of bytes of file-backed memory and MADV_FREE anonymous memory(
+                LazyFree pages) on inactive LRU list.
 active_file	# of bytes of file-backed memory on active LRU list.
 unevictable	# of bytes of memory that cannot be reclaimed (mlocked etc).
 =============== ===============================================================
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@ -1245,17 +1245,13 @@ PAGE_SIZE multiple when read back.
 	This is a simple interface to trigger memory reclaim in the
 	target cgroup.

-	This file accepts a single key, the number of bytes to reclaim.
-	No nested keys are currently supported.
+	This file accepts a string which contains the number of bytes to
+	reclaim.

 	Example::

 	  echo "1G" > memory.reclaim

-	The interface can be later extended with nested keys to
-	configure the reclaim behavior. For example, specify the
-	type of memory to reclaim from (anon, file, ..).
-
 	Please note that the kernel can over or under reclaim from
 	the target cgroup. If less bytes are reclaimed than the
 	specified amount, -EAGAIN is returned.
@ -1267,6 +1263,13 @@ PAGE_SIZE multiple when read back.
 	This means that the networking layer will not adapt based on
 	reclaim induced by memory.reclaim.

+	This file also allows the user to specify the nodes to reclaim from,
+	via the 'nodes=' key, for example::
+
+	  echo "1G nodes=0,1" > memory.reclaim
+
+	The above instructs the kernel to reclaim memory from nodes 0,1.
+
  memory.peak
 	A read-only single value file which exists on non-root
 	cgroups.
@ -1488,12 +1491,18 @@ PAGE_SIZE multiple when read back.
 	  pgscan_direct (npn)
 		Amount of scanned pages directly  (in an inactive LRU list)

+	  pgscan_khugepaged (npn)
+		Amount of scanned pages by khugepaged  (in an inactive LRU list)
+
 	  pgsteal_kswapd (npn)
 		Amount of reclaimed pages by kswapd

 	  pgsteal_direct (npn)
 		Amount of reclaimed pages directly

+	  pgsteal_khugepaged (npn)
+		Amount of reclaimed pages by khugepaged
+
 	  pgfault (npn)
 		Total number of page faults incurred

--- a/Documentation/admin-guide/cifs/usage.rst
+++ b/Documentation/admin-guide/cifs/usage.rst
@ -858,7 +858,7 @@ CIFS kernel module parameters
 These module parameters can be specified or modified either during the time of
 module loading or during the runtime by using the interface::

-	/proc/module/cifs/parameters/<param>
+	/sys/module/cifs/parameters/<param>

 i.e.::

--- a/Documentation/admin-guide/device-mapper/dm-init.rst
+++ b/Documentation/admin-guide/device-mapper/dm-init.rst
@ -123,3 +123,11 @@ Other examples (per target):
    0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256
    fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd
    51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584
+
+For setups using device-mapper on top of asynchronously probed block
+devices (MMC, USB, ..), it may be necessary to tell dm-init to
+explicitly wait for them to become available before setting up the
+device-mapper tables. This can be done with the "dm-mod.waitfor="
+module parameter, which takes a list of devices to wait for::
+
+  dm-mod.waitfor=<device1>[,..,<deviceN>]
--- a/Documentation/admin-guide/devices.txt
+++ b/Documentation/admin-guide/devices.txt
@ -3080,6 +3080,11 @@
 		  ...
 		  255 = /dev/osd255	256th OSD Device

+ 261 char	Compute Acceleration Devices
+		  0 = /dev/accel/accel0	First acceleration device
+		  1 = /dev/accel/accel1	Second acceleration device
+		    ...
+
 384-511 char	RESERVED FOR DYNAMIC ASSIGNMENT
 		Character devices that request a dynamic allocation of major
 		number will take numbers starting from 511 and downward,
--- a/Documentation/admin-guide/hw_random.rst
+++ b/Documentation/admin-guide/hw_random.rst
@ -1,6 +1,6 @@
-==========================================================
-Linux support for random number generator in i8xx chipsets
-==========================================================
+=================================
+Hardware random number generators
+=================================

 Introduction
 ============
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@ -595,3 +595,32 @@ X2TLB
 -----

 Indicates whether the crashed kernel enabled SH extended mode.
+
+RISCV64
+=======
+
+VA_BITS
+-------
+
+The maximum number of bits for virtual addresses. Used to compute the
+virtual memory ranges.
+
+PAGE_OFFSET
+-----------
+
+Indicates the virtual kernel start address of the direct-mapped RAM region.
+
+phys_ram_base
+-------------
+
+Indicates the start physical RAM address.
+
+MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END|KERNEL_LINK_ADDR
+----------------------------------------------------------------------------------------------
+
+Used to get the correct ranges:
+
+  * MODULES_VADDR ~ MODULES_END : Kernel module space.
+  * VMALLOC_START ~ VMALLOC_END : vmalloc() / ioremap() space.
+  * VMEMMAP_START ~ VMEMMAP_END : vmemmap space, used for struct page array.
+  * KERNEL_LINK_ADDR : start address of Kernel link and BPF
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@ -703,6 +703,17 @@
 	condev=		[HW,S390] console device
 	conmode=

+	con3215_drop=	[S390] 3215 console drop mode.
+			Format: y|n|Y|N|1|0
+			When set to true, drop data on the 3215 console when
+			the console buffer is full. In this case the
+			operator using a 3270 terminal emulator (for example
+			x3270) does not have to enter the clear key for the
+			console output to advance and the kernel to continue.
+			This leads to a much faster boot time when a 3270
+			terminal emulator is active. If no 3270 terminal
+			emulator is used, this parameter has no effect.
+
 	console=	[KNL] Output console device and options.

 		tty<n>	Use the virtual console device <n>.
@ -831,7 +842,7 @@
 			memory region [offset, offset + size] for that kernel
 			image. If '@offset' is omitted, then a suitable offset
 			is selected automatically.
-			[KNL, X86-64] Select a region under 4G first, and
+			[KNL, X86-64, ARM64] Select a region under 4G first, and
 			fall back to reserve region above 4G when '@offset'
 			hasn't been specified.
 			See Documentation/admin-guide/kdump/kdump.rst for further details.
@ -851,26 +862,23 @@
 			available.
 			It will be ignored if crashkernel=X is specified.
 	crashkernel=size[KMG],low
-			[KNL, X86-64] range under 4G. When crashkernel=X,high
+			[KNL, X86-64, ARM64] range under 4G. When crashkernel=X,high
 			is passed, kernel could allocate physical memory region
 			above 4G, that cause second kernel crash on system
 			that require some amount of low memory, e.g. swiotlb
 			requires at least 64M+32K low memory, also enough extra
 			low memory is needed to make sure DMA buffers for 32-bit
 			devices won't run out. Kernel would try to allocate
-			at least 256M below 4G automatically.
+			default	size of memory below 4G automatically. The default
+			size is	platform dependent.
+			  --> x86: max(swiotlb_size_or_default() + 8MiB, 256MiB)
+			  --> arm64: 128MiB
 			This one lets the user specify own low range under 4G
 			for second kernel instead.
 			0: to disable low allocation.
 			It will be ignored when crashkernel=X,high is not used
 			or memory reserved is below 4G.

-			[KNL, ARM64] range in low memory.
-			This one lets the user specify a low range in the
-			DMA zone for the crash dump kernel.
-			It will be ignored when crashkernel=X,high is not used
-			or memory reserved is located in the DMA zones.
-
 	cryptomgr.notests
 			[KNL] Disable crypto self-tests

@ -3777,12 +3785,15 @@
 			shutdown the other cpus.  Instead use the REBOOT_VECTOR
 			irq.

-	nomodeset	Disable kernel modesetting. DRM drivers will not perform
-			display-mode changes or accelerated rendering. Only the
-			system framebuffer will be available for use if this was
-			set-up by the firmware or boot loader.
+	nomodeset	Disable kernel modesetting. Most systems' firmware
+			sets up a display mode and provides framebuffer memory
+			for output. With nomodeset, DRM and fbdev drivers will
+			not load if they could possibly displace the pre-
+			initialized output. Only the system framebuffer will
+			be available for use. The respective drivers will not
+			perform display-mode changes or accelerated rendering.

-			Useful as fallback, or for testing and debugging.
+			Useful as error fallback, or for testing and debugging.

 	nomodule	Disable module load

@ -4566,17 +4577,15 @@

 	ramdisk_start=	[RAM] RAM disk image start address

-	random.trust_cpu={on,off}
-			[KNL] Enable or disable trusting the use of the
-			CPU's random number generator (if available) to
-			fully seed the kernel's CRNG. Default is controlled
-			by CONFIG_RANDOM_TRUST_CPU.
+	random.trust_cpu=off
+			[KNL] Disable trusting the use of the CPU's
+			random number generator (if available) to
+			initialize the kernel's RNG.

-	random.trust_bootloader={on,off}
-			[KNL] Enable or disable trusting the use of a
-			seed passed by the bootloader (if available) to
-			fully seed the kernel's CRNG. Default is controlled
-			by CONFIG_RANDOM_TRUST_BOOTLOADER.
+	random.trust_bootloader=off
+			[KNL] Disable trusting the use of the a seed
+			passed by the bootloader (if available) to
+			initialize the kernel's RNG.

 	randomize_kstack_offset=
 			[KNL] Enable or disable kernel stack offset
@ -6257,6 +6266,25 @@
 			See also Documentation/trace/ftrace.rst "trace options"
 			section.

+	trace_trigger=[trigger-list]
+			[FTRACE] Add a event trigger on specific events.
+			Set a trigger on top of a specific event, with an optional
+			filter.
+
+			The format is is "trace_trigger=<event>.<trigger>[ if <filter>],..."
+			Where more than one trigger may be specified that are comma deliminated.
+
+			For example:
+
+			  trace_trigger="sched_switch.stacktrace if prev_state == 2"
+
+			The above will enable the "stacktrace" trigger on the "sched_switch"
+			event but only trigger it if the "prev_state" of the "sched_switch"
+			event is "2" (TASK_UNINTERUPTIBLE).
+
+			See also "Event triggers" in Documentation/trace/events.rst
+
+
 	traceoff_on_warning
 			[FTRACE] enable this option to disable tracing when a
 			warning is hit. This turns off "tracing_on". Tracing can
@ -6959,3 +6987,14 @@
 				memory, and other data can't be written using
 				xmon commands.
 			off	xmon is disabled.
+
+	amd_pstate=	[X86]
+			disable
+			  Do not enable amd_pstate as the default
+			  scaling driver for the supported processors
+			passive
+			  Use amd_pstate as a scaling driver, driver requests a
+			  desired performance on this abstract scale and the power
+			  management firmware translates the requests into actual
+			  hardware states (core frequency, data fabric and memory
+			  clocks etc.)
--- a/Documentation/admin-guide/media/cec-drivers.rst
+++ b/Documentation/admin-guide/media/cec-drivers.rst
@ -1,10 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-=================================
-CEC driver-specific documentation
-=================================
-
-.. toctree::
-	:maxdepth: 2
-
-	pulse8-cec
--- a/Documentation/admin-guide/media/cec.rst
+++ b/Documentation/admin-guide/media/cec.rst
@ -0,0 +1,369 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========
+HDMI CEC
+========
+
+Supported hardware in mainline
+==============================
+
+HDMI Transmitters:
+
+- Exynos4
+- Exynos5
+- STIH4xx HDMI CEC
+- V4L2 adv7511 (same HW, but a different driver from the drm adv7511)
+- stm32
+- Allwinner A10 (sun4i)
+- Raspberry Pi
+- dw-hdmi (Synopsis IP)
+- amlogic (meson ao-cec and ao-cec-g12a)
+- drm adv7511/adv7533
+- omap4
+- tegra
+- rk3288, rk3399
+- tda998x
+- DisplayPort CEC-Tunneling-over-AUX on i915, nouveau and amdgpu
+- ChromeOS EC CEC
+- CEC for SECO boards (UDOO x86).
+- Chrontel CH7322
+
+
+HDMI Receivers:
+
+- adv7604/11/12
+- adv7842
+- tc358743
+
+USB Dongles (see below for additional information on how to use these
+dongles):
+
+- Pulse-Eight: the pulse8-cec driver implements the following module option:
+  ``persistent_config``: by default this is off, but when set to 1 the driver
+  will store the current settings to the device's internal eeprom and restore
+  it the next time the device is connected to the USB port.
+- RainShadow Tech. Note: this driver does not support the persistent_config
+  module option of the Pulse-Eight driver. The hardware supports it, but I
+  have no plans to add this feature. But I accept patches :-)
+
+Miscellaneous:
+
+- vivid: emulates a CEC receiver and CEC transmitter.
+  Can be used to test CEC applications without actual CEC hardware.
+
+- cec-gpio. If the CEC pin is hooked up to a GPIO pin then
+  you can control the CEC line through this driver. This supports error
+  injection as well.
+
+
+Utilities
+=========
+
+Utilities are available here: https://git.linuxtv.org/v4l-utils.git
+
+``utils/cec-ctl``: control a CEC device
+
+``utils/cec-compliance``: test compliance of a remote CEC device
+
+``utils/cec-follower``: emulate a CEC follower device
+
+Note that ``cec-ctl`` has support for the CEC Hospitality Profile as is
+used in some hotel displays. See http://www.htng.org.
+
+Note that the libcec library (https://github.com/Pulse-Eight/libcec) supports
+the linux CEC framework.
+
+If you want to get the CEC specification, then look at the References of
+the HDMI wikipedia page: https://en.wikipedia.org/wiki/HDMI. CEC is part
+of the HDMI specification. HDMI 1.3 is freely available (very similar to
+HDMI 1.4 w.r.t. CEC) and should be good enough for most things.
+
+
+DisplayPort to HDMI Adapters with working CEC
+=============================================
+
+Background: most adapters do not support the CEC Tunneling feature,
+and of those that do many did not actually connect the CEC pin.
+Unfortunately, this means that while a CEC device is created, it
+is actually all alone in the world and will never be able to see other
+CEC devices.
+
+This is a list of known working adapters that have CEC Tunneling AND
+that properly connected the CEC pin. If you find adapters that work
+but are not in this list, then drop me a note.
+
+To test: hook up your DP-to-HDMI adapter to a CEC capable device
+(typically a TV), then run::
+
+	cec-ctl --playback	# Configure the PC as a CEC Playback device
+	cec-ctl -S		# Show the CEC topology
+
+The ``cec-ctl -S`` command should show at least two CEC devices,
+ourselves and the CEC device you are connected to (i.e. typically the TV).
+
+General note: I have only seen this work with the Parade PS175, PS176 and
+PS186 chipsets and the MegaChips 2900. While MegaChips 28x0 claims CEC support,
+I have never seen it work.
+
+USB-C to HDMI
+-------------
+
+Samsung Multiport Adapter EE-PW700: https://www.samsung.com/ie/support/model/EE-PW700BBEGWW/
+
+Kramer ADC-U31C/HF: https://www.kramerav.com/product/ADC-U31C/HF
+
+Club3D CAC-2504: https://www.club-3d.com/en/detail/2449/usb_3.1_type_c_to_hdmi_2.0_uhd_4k_60hz_active_adapter/
+
+DisplayPort to HDMI
+-------------------
+
+Club3D CAC-1080: https://www.club-3d.com/en/detail/2442/displayport_1.4_to_hdmi_2.0b_hdr/
+
+CableCreation (SKU: CD0712): https://www.cablecreation.com/products/active-displayport-to-hdmi-adapter-4k-hdr
+
+HP DisplayPort to HDMI True 4k Adapter (P/N 2JA63AA): https://www.hp.com/us-en/shop/pdp/hp-displayport-to-hdmi-true-4k-adapter
+
+Mini-DisplayPort to HDMI
+------------------------
+
+Club3D CAC-1180: https://www.club-3d.com/en/detail/2443/mini_displayport_1.4_to_hdmi_2.0b_hdr/
+
+Note that passive adapters will never work, you need an active adapter.
+
+The Club3D adapters in this list are all MegaChips 2900 based. Other Club3D adapters
+are PS176 based and do NOT have the CEC pin hooked up, so only the three Club3D
+adapters above are known to work.
+
+I suspect that MegaChips 2900 based designs in general are likely to work
+whereas with the PS176 it is more hit-and-miss (mostly miss). The PS186 is
+likely to have the CEC pin hooked up, it looks like they changed the reference
+design for that chipset.
+
+
+USB CEC Dongles
+===============
+
+These dongles appear as ``/dev/ttyACMX`` devices and need the ``inputattach``
+utility to create the ``/dev/cecX`` devices. Support for the Pulse-Eight
+has been added to ``inputattach`` 1.6.0. Support for the Rainshadow Tech has
+been added to ``inputattach`` 1.6.1.
+
+You also need udev rules to automatically start systemd services::
+
+	SUBSYSTEM=="tty", KERNEL=="ttyACM[0-9]*", ATTRS{idVendor}=="2548", ATTRS{idProduct}=="1002", ACTION=="add", TAG+="systemd", ENV{SYSTEMD_WANTS}+="pulse8-cec-inputattach@%k.service"
+	SUBSYSTEM=="tty", KERNEL=="ttyACM[0-9]*", ATTRS{idVendor}=="2548", ATTRS{idProduct}=="1001", ACTION=="add", TAG+="systemd", ENV{SYSTEMD_WANTS}+="pulse8-cec-inputattach@%k.service"
+	SUBSYSTEM=="tty", KERNEL=="ttyACM[0-9]*", ATTRS{idVendor}=="04d8", ATTRS{idProduct}=="ff59", ACTION=="add", TAG+="systemd", ENV{SYSTEMD_WANTS}+="rainshadow-cec-inputattach@%k.service"
+
+and these systemd services:
+
+For Pulse-Eight make /lib/systemd/system/pulse8-cec-inputattach@.service::
+
+	[Unit]
+	Description=inputattach for pulse8-cec device on %I
+
+	[Service]
+	Type=simple
+	ExecStart=/usr/bin/inputattach --pulse8-cec /dev/%I
+
+For the RainShadow Tech make /lib/systemd/system/rainshadow-cec-inputattach@.service::
+
+	[Unit]
+	Description=inputattach for rainshadow-cec device on %I
+
+	[Service]
+	Type=simple
+	ExecStart=/usr/bin/inputattach --rainshadow-cec /dev/%I
+
+
+For proper suspend/resume support create: /lib/systemd/system/restart-cec-inputattach.service::
+
+	[Unit]
+	Description=restart inputattach for cec devices
+	After=suspend.target
+
+	[Service]
+	Type=forking
+	ExecStart=/bin/bash -c 'for d in /dev/serial/by-id/usb-Pulse-Eight*; do /usr/bin/inputattach --daemon --pulse8-cec $d; done; for d in /dev/serial/by-id/usb-RainShadow_Tech*; do /usr/bin/inputattach --daemon --rainshadow-cec $d; done'
+
+	[Install]
+	WantedBy=suspend.target
+
+And run ``systemctl enable restart-cec-inputattach``.
+
+To automatically set the physical address of the CEC device whenever the
+EDID changes, you can use ``cec-ctl`` with the ``-E`` option::
+
+	cec-ctl -E /sys/class/drm/card0-DP-1/edid
+
+This assumes the dongle is connected to the card0-DP-1 output (``xrandr`` will tell
+you which output is used) and it will poll for changes to the EDID and update
+the Physical Address whenever they occur.
+
+To automatically run this command you can use cron. Edit crontab with
+``crontab -e`` and add this line::
+
+	@reboot /usr/local/bin/cec-ctl -E /sys/class/drm/card0-DP-1/edid
+
+This only works for display drivers that expose the EDID in ``/sys/class/drm``,
+such as the i915 driver.
+
+
+CEC Without HPD
+===============
+
+Some displays when in standby mode have no HDMI Hotplug Detect signal, but
+CEC is still enabled so connected devices can send an <Image View On> CEC
+message in order to wake up such displays. Unfortunately, not all CEC
+adapters can support this. An example is the Odroid-U3 SBC that has a
+level-shifter that is powered off when the HPD signal is low, thus
+blocking the CEC pin. Even though the SoC can use CEC without a HPD,
+the level-shifter will prevent this from functioning.
+
+There is a CEC capability flag to signal this: ``CEC_CAP_NEEDS_HPD``.
+If set, then the hardware cannot wake up displays with this behavior.
+
+Note for CEC application implementers: the <Image View On> message must
+be the first message you send, don't send any other messages before.
+Certain very bad but unfortunately not uncommon CEC implementations
+get very confused if they receive anything else but this message and
+they won't wake up.
+
+When writing a driver it can be tricky to test this. There are two
+ways to do this:
+
+1) Get a Pulse-Eight USB CEC dongle, connect an HDMI cable from your
+   device to the Pulse-Eight, but do not connect the Pulse-Eight to
+   the display.
+
+   Now configure the Pulse-Eight dongle::
+
+	cec-ctl -p0.0.0.0 --tv
+
+   and start monitoring::
+
+	sudo cec-ctl -M
+
+   On the device you are testing run::
+
+	cec-ctl --playback
+
+   It should report a physical address of f.f.f.f. Now run this
+   command::
+
+	cec-ctl -t0 --image-view-on
+
+   The Pulse-Eight should see the <Image View On> message. If not,
+   then something (hardware and/or software) is preventing the CEC
+   message from going out.
+
+   To make sure you have the wiring correct just connect the
+   Pulse-Eight to a CEC-enabled display and run the same command
+   on your device: now there is a HPD, so you should see the command
+   arriving at the Pulse-Eight.
+
+2) If you have another linux device supporting CEC without HPD, then
+   you can just connect your device to that device. Yes, you can connect
+   two HDMI outputs together. You won't have a HPD (which is what we
+   want for this test), but the second device can monitor the CEC pin.
+
+   Otherwise use the same commands as in 1.
+
+If CEC messages do not come through when there is no HPD, then you
+need to figure out why. Typically it is either a hardware restriction
+or the software powers off the CEC core when the HPD goes low. The
+first cannot be corrected of course, the second will likely required
+driver changes.
+
+
+Microcontrollers & CEC
+======================
+
+We have seen some CEC implementations in displays that use a microcontroller
+to sample the bus. This does not have to be a problem, but some implementations
+have timing issues. This is hard to discover unless you can hook up a low-level
+CEC debugger (see the next section).
+
+You will see cases where the CEC transmitter holds the CEC line high or low for
+a longer time than is allowed. For directed messages this is not a problem since
+if that happens the message will not be Acked and it will be retransmitted.
+For broadcast messages no such mechanism exists.
+
+It's not clear what to do about this. It is probably wise to transmit some
+broadcast messages twice to reduce the chance of them being lost. Specifically
+<Standby> and <Active Source> are candidates for that.
+
+
+Making a CEC debugger
+=====================
+
+By using a Raspberry Pi 2B/3/4 and some cheap components you can make
+your own low-level CEC debugger.
+
+Here is a picture of my setup:
+
+https://hverkuil.home.xs4all.nl/rpi3-cec.jpg
+
+It's a Raspberry Pi 3 together with a breadboard and some breadboard wires:
+
+http://www.dx.com/p/diy-40p-male-to-female-male-to-male-female-to-female-dupont-line-wire-3pcs-356089#.WYLOOXWGN7I
+
+Finally on of these HDMI female-female passthrough connectors (full soldering type 1):
+
+https://elabbay.myshopify.com/collections/camera/products/hdmi-af-af-v1a-hdmi-type-a-female-to-hdmi-type-a-female-pass-through-adapter-breakout-board?variant=45533926147
+
+We've tested this and it works up to 4kp30 (297 MHz). The quality is not high
+enough to pass-through 4kp60 (594 MHz).
+
+I also added an RTC and a breakout shield:
+
+https://www.amazon.com/Makerfire%C2%AE-Raspberry-Module-DS1307-Battery/dp/B00ZOXWHK4
+
+https://www.dx.com/p/raspberry-pi-gpio-expansion-board-breadboard-easy-multiplexing-board-one-to-three-with-screw-for-raspberry-pi-2-3-b-b-2729992.html#.YGRCG0MzZ7I
+
+These two are not needed but they make life a bit easier.
+
+If you want to monitor the HPD line as well, then you need one of these
+level shifters:
+
+https://www.adafruit.com/product/757
+
+(This is just where I got these components, there are many other places you
+can get similar things).
+
+The CEC pin of the HDMI connector needs to be connected to these pins:
+CE0/IO8 and CE1/IO7 (pull-up GPIOs). The (optional) HPD pin of the HDMI
+connector should be connected (via a level shifter to convert the 5V
+to 3.3V) to these pins: IO17 and IO27. The (optional) 5V pin of the HDMI
+connector should be connected (via a level shifter) to these pins: IO22
+and IO24. Monitoring the HPD an 5V lines is not necessary, but it is helpful.
+
+This kernel patch will hook up the cec-gpio driver correctly to
+e.g. ``arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts``::
+
+	cec-gpio@7 {
+		compatible = "cec-gpio";
+		cec-gpios = <&gpio 7 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>;
+		hpd-gpios = <&gpio 17 GPIO_ACTIVE_HIGH>;
+		v5-gpios = <&gpio 22 GPIO_ACTIVE_HIGH>;
+	};
+
+	cec-gpio@8 {
+		compatible = "cec-gpio";
+		cec-gpios = <&gpio 8 (GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN)>;
+		hpd-gpios = <&gpio 27 GPIO_ACTIVE_HIGH>;
+		v5-gpios = <&gpio 24 GPIO_ACTIVE_HIGH>;
+	};
+
+This dts change will enable two cec GPIO devices: I typically use one to
+send/receive CEC commands and the other to monitor. If you monitor using
+an unconfigured CEC adapter then it will use GPIO interrupts which makes
+monitoring very accurate.
+
+The documentation on how to use the error injection is here: :ref:`cec_pin_error_inj`.
+
+``cec-ctl --monitor-pin`` will do low-level CEC bus sniffing and analysis.
+You can also store the CEC traffic to file using ``--store-pin`` and analyze
+it later using ``--analyze-pin``.
+
+You can also use this as a full-fledged CEC device by configuring it
+using ``cec-ctl --tv -p0.0.0.0`` or ``cec-ctl --playback -p1.0.0.0``.
--- a/Documentation/admin-guide/media/index.rst
+++ b/Documentation/admin-guide/media/index.rst
@ -38,13 +38,14 @@ The media subsystem

 	remote-controller

+	cec
+
 	dvb

 	cardlist

 	v4l-drivers
 	dvb-drivers
-	cec-drivers

 **Copyright** |copy| 1999-2020 : LinuxTV Developers

--- a/Documentation/admin-guide/media/pulse8-cec.rst
+++ b/Documentation/admin-guide/media/pulse8-cec.rst
@ -1,13 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-Pulse-Eight CEC Adapter driver
-==============================
-
-The pulse8-cec driver implements the following module option:
-
-``persistent_config``
---------------------
-
-By default this is off, but when set to 1 the driver will store the current
-settings to the device's internal eeprom and restore it the next time the
-device is connected to the USB port.
--- a/Documentation/admin-guide/media/v4l-drivers.rst
+++ b/Documentation/admin-guide/media/v4l-drivers.rst
@ -31,4 +31,5 @@ Video4Linux (V4L) driver-specific documentation
 	si4713
 	si476x
 	vimc
+	visl
 	vivid
--- a/Documentation/admin-guide/media/vimc.rst
+++ b/Documentation/admin-guide/media/vimc.rst
@ -35,11 +35,11 @@ of commands fits for the default topology:

        media-ctl -d platform:vimc -V '"Sensor A":0[fmt:SBGGR8_1X8/640x480]'
        media-ctl -d platform:vimc -V '"Debayer A":0[fmt:SBGGR8_1X8/640x480]'
-        media-ctl -d platform:vimc -V '"Sensor B":0[fmt:SBGGR8_1X8/640x480]'
-        media-ctl -d platform:vimc -V '"Debayer B":0[fmt:SBGGR8_1X8/640x480]'
-        v4l2-ctl -z platform:vimc -d "RGB/YUV Capture" -v width=1920,height=1440
+        media-ctl -d platform:vimc -V '"Scaler":0[fmt:RGB888_1X24/640x480]'
+        media-ctl -d platform:vimc -V '"Scaler":0[crop:(100,50)/400x150]'
+        media-ctl -d platform:vimc -V '"Scaler":1[fmt:RGB888_1X24/300x700]'
+        v4l2-ctl -z platform:vimc -d "RGB/YUV Capture" -v width=300,height=700
        v4l2-ctl -z platform:vimc -d "Raw Capture 0" -v pixelformat=BA81
-        v4l2-ctl -z platform:vimc -d "Raw Capture 1" -v pixelformat=BA81

 Subdevices
 ----------
--- a/Documentation/admin-guide/media/visl.rst
+++ b/Documentation/admin-guide/media/visl.rst
@ -0,0 +1,175 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+The Virtual Stateless Decoder Driver (visl)
+===========================================
+
+A virtual stateless decoder device for stateless uAPI development
+purposes.
+
+This tool's objective is to help the development and testing of
+userspace applications that use the V4L2 stateless API to decode media.
+
+A userspace implementation can use visl to run a decoding loop even when
+no hardware is available or when the kernel uAPI for the codec has not
+been upstreamed yet. This can reveal bugs at an early stage.
+
+This driver can also trace the contents of the V4L2 controls submitted
+to it.  It can also dump the contents of the vb2 buffers through a
+debugfs interface. This is in many ways similar to the tracing
+infrastructure available for other popular encode/decode APIs out there
+and can help develop a userspace application by using another (working)
+one as a reference.
+
+.. note::
+
+        No actual decoding of video frames is performed by visl. The
+        V4L2 test pattern generator is used to write various debug information
+        to the capture buffers instead.
+
+Module parameters
+-----------------
+
+- visl_debug: Activates debug info, printing various debug messages through
+  dprintk. Also controls whether per-frame debug info is shown. Defaults to off.
+  Note that enabling this feature can result in slow performance through serial.
+
+- visl_transtime_ms: Simulated process time in milliseconds. Slowing down the
+  decoding speed can be useful for debugging.
+
+- visl_dprintk_frame_start, visl_dprintk_frame_nframes: Dictates a range of
+  frames where dprintk is activated. This only controls the dprintk tracing on a
+  per-frame basis. Note that printing a lot of data can be slow through serial.
+
+- keep_bitstream_buffers: Controls whether bitstream (i.e. OUTPUT) buffers are
+  kept after a decoding session. Defaults to false so as to reduce the amount of
+  clutter. keep_bitstream_buffers == false works well when live debugging the
+  client program with GDB.
+
+- bitstream_trace_frame_start, bitstream_trace_nframes: Similar to
+  visl_dprintk_frame_start, visl_dprintk_nframes, but controls the dumping of
+  buffer data through debugfs instead.
+
+What is the default use case for this driver?
+---------------------------------------------
+
+This driver can be used as a way to compare different userspace implementations.
+This assumes that a working client is run against visl and that the ftrace and
+OUTPUT buffer data is subsequently used to debug a work-in-progress
+implementation.
+
+Information on reference frames, their timestamps, the status of the OUTPUT and
+CAPTURE queues and more can be read directly from the CAPTURE buffers.
+
+Supported codecs
+----------------
+
+The following codecs are supported:
+
+- FWHT
+- MPEG2
+- VP8
+- VP9
+- H.264
+- HEVC
+
+visl trace events
+-----------------
+The trace events are defined on a per-codec basis, e.g.:
+
+.. code-block:: bash
+
+        $ ls /sys/kernel/debug/tracing/events/ | grep visl
+        visl_fwht_controls
+        visl_h264_controls
+        visl_hevc_controls
+        visl_mpeg2_controls
+        visl_vp8_controls
+        visl_vp9_controls
+
+For example, in order to dump HEVC SPS data:
+
+.. code-block:: bash
+
+        $ echo 1 >  /sys/kernel/debug/tracing/events/visl_hevc_controls/v4l2_ctrl_hevc_sps/enable
+
+The SPS data will be dumped to the trace buffer, i.e.:
+
+.. code-block:: bash
+
+        $ cat /sys/kernel/debug/tracing/trace
+        video_parameter_set_id 0
+        seq_parameter_set_id 0
+        pic_width_in_luma_samples 1920
+        pic_height_in_luma_samples 1080
+        bit_depth_luma_minus8 0
+        bit_depth_chroma_minus8 0
+        log2_max_pic_order_cnt_lsb_minus4 4
+        sps_max_dec_pic_buffering_minus1 6
+        sps_max_num_reorder_pics 2
+        sps_max_latency_increase_plus1 0
+        log2_min_luma_coding_block_size_minus3 0
+        log2_diff_max_min_luma_coding_block_size 3
+        log2_min_luma_transform_block_size_minus2 0
+        log2_diff_max_min_luma_transform_block_size 3
+        max_transform_hierarchy_depth_inter 2
+        max_transform_hierarchy_depth_intra 2
+        pcm_sample_bit_depth_luma_minus1 0
+        pcm_sample_bit_depth_chroma_minus1 0
+        log2_min_pcm_luma_coding_block_size_minus3 0
+        log2_diff_max_min_pcm_luma_coding_block_size 0
+        num_short_term_ref_pic_sets 0
+        num_long_term_ref_pics_sps 0
+        chroma_format_idc 1
+        sps_max_sub_layers_minus1 0
+        flags AMP_ENABLED|SAMPLE_ADAPTIVE_OFFSET|TEMPORAL_MVP_ENABLED|STRONG_INTRA_SMOOTHING_ENABLED
+
+
+Dumping OUTPUT buffer data through debugfs
+------------------------------------------
+
+If the **VISL_DEBUGFS** Kconfig is enabled, visl will populate
+**/sys/kernel/debug/visl/bitstream** with OUTPUT buffer data according to the
+values of bitstream_trace_frame_start and bitstream_trace_nframes. This can
+highlight errors as broken clients may fail to fill the buffers properly.
+
+A single file is created for each processed OUTPUT buffer. Its name contains an
+integer that denotes the buffer sequence, i.e.:
+
+.. code-block:: c
+
+	snprintf(name, 32, "bitstream%d", run->src->sequence);
+
+Dumping the values is simply a matter of reading from the file, i.e.:
+
+For the buffer with sequence == 0:
+
+.. code-block:: bash
+
+        $ xxd /sys/kernel/debug/visl/bitstream/bitstream0
+        00000000: 2601 af04 d088 bc25 a173 0e41 a4f2 3274  &......%.s.A..2t
+        00000010: c668 cb28 e775 b4ac f53a ba60 f8fd 3aa1  .h.(.u...:.`..:.
+        00000020: 46b4 bcfc 506c e227 2372 e5f5 d7ea 579f  F...Pl.'#r....W.
+        00000030: 6371 5eb5 0eb8 23b5 ca6a 5de5 983a 19e4  cq^...#..j]..:..
+        00000040: e8c3 4320 b4ba a226 cbc1 4138 3a12 32d6  ..C ...&..A8:.2.
+        00000050: fef3 247b 3523 4e90 9682 ac8e eb0c a389  ..${5#N.........
+        00000060: ddd0 6cfc 0187 0e20 7aae b15b 1812 3d33  ..l.... z..[..=3
+        00000070: e1c5 f425 a83a 00b7 4f18 8127 3c4c aefb  ...%.:..O..'<L..
+
+For the buffer with sequence == 1:
+
+.. code-block:: bash
+
+        $ xxd /sys/kernel/debug/visl/bitstream/bitstream1
+        00000000: 0201 d021 49e1 0c40 aa11 1449 14a6 01dc  ...!I..@...I....
+        00000010: 7023 889a c8cd 2cd0 13b4 dab0 e8ca 21fe  p#....,.......!.
+        00000020: c4c8 ab4c 486e 4e2f b0df 96cc c74e 8dde  ...LHnN/.....N..
+        00000030: 8ce7 ee36 d880 4095 4d64 30a0 ff4f 0c5e  ...6..@.Md0..O.^
+        00000040: f16b a6a1 d806 ca2a 0ece a673 7bea 1f37  .k.....*...s{..7
+        00000050: 370f 5bb9 1dc4 ba21 6434 bc53 0173 cba0  7.[....!d4.S.s..
+        00000060: dfe6 bc99 01ea b6e0 346b 92b5 c8de 9f5d  ........4k.....]
+        00000070: e7cc 3484 1769 fef2 a693 a945 2c8b 31da  ..4..i.....E,.1.
+
+And so on.
+
+By default, the files are removed during STREAMOFF. This is to reduce the amount
+of clutter.
--- a/Documentation/admin-guide/media/vivid.rst
+++ b/Documentation/admin-guide/media/vivid.rst
@ -392,7 +392,7 @@ Which one is returned depends on the chosen channel, each next valid channel
 will cycle through the possible audio subchannel combinations. This allows
 you to test the various combinations by just switching channels..

-Finally, for these inputs the v4l2_timecode struct is filled in in the
+Finally, for these inputs the v4l2_timecode struct is filled in the
 dequeued v4l2_buffer struct.


--- a/Documentation/admin-guide/mm/damon/usage.rst
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@ -88,6 +88,9 @@ comma (","). ::
    │ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
    │ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low
    │ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
+    │ │ │ │ │ │ │ tried_regions/
+    │ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age
+    │ │ │ │ │ │ │ │ ...
    │ │ │ │ │ │ ...
    │ │ │ │ ...
    │ │ ...
@ -125,7 +128,14 @@ in the state.  Writing ``commit`` to the ``state`` file makes kdamond reads the
 user inputs in the sysfs files except ``state`` file again.  Writing
 ``update_schemes_stats`` to ``state`` file updates the contents of stats files
 for each DAMON-based operation scheme of the kdamond.  For details of the
-stats, please refer to :ref:`stats section <sysfs_schemes_stats>`.
+stats, please refer to :ref:`stats section <sysfs_schemes_stats>`.  Writing
+``update_schemes_tried_regions`` to ``state`` file updates the DAMON-based
+operation scheme action tried regions directory for each DAMON-based operation
+scheme of the kdamond.  Writing ``clear_schemes_tried_regions`` to ``state``
+file clears the DAMON-based operating scheme action tried regions directory for
+each DAMON-based operation scheme of the kdamond.  For details of the
+DAMON-based operation scheme action tried regions directory, please refer to
+:ref:tried_regions section <sysfs_schemes_tried_regions>`.

 If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.

@ -166,6 +176,8 @@ You can set and get what type of monitoring operations DAMON will use for the
 context by writing one of the keywords listed in ``avail_operations`` file and
 reading from the ``operations`` file.

+.. _sysfs_monitoring_attrs:
+
 contexts/<N>/monitoring_attrs/
 ------------------------------

@ -235,6 +247,9 @@ In each region directory, you will find two files (``start`` and ``end``).  You
 can set and get the start and end addresses of the initial monitoring target
 region by writing to and reading from the files, respectively.

+Each region should not overlap with others.  ``end`` of directory ``N`` should
+be equal or smaller than ``start`` of directory ``N+1``.
+
 contexts/<N>/schemes/
 ---------------------

@ -252,8 +267,9 @@ to ``N-1``.  Each directory represents each DAMON-based operation scheme.
 schemes/<N>/
 ------------

-In each scheme directory, four directories (``access_pattern``, ``quotas``,
-``watermarks``, and ``stats``) and one file (``action``) exist.
+In each scheme directory, five directories (``access_pattern``, ``quotas``,
+``watermarks``, ``stats``, and ``tried_regions``) and one file (``action``)
+exist.

 The ``action`` file is for setting and getting what action you want to apply to
 memory regions having specific access pattern of the interest.  The keywords
@ -348,6 +364,32 @@ should ask DAMON sysfs interface to updte the content of the files for the
 stats by writing a special keyword, ``update_schemes_stats`` to the relevant
 ``kdamonds/<N>/state`` file.

+.. _sysfs_schemes_tried_regions:
+
+schemes/<N>/tried_regions/
+--------------------------
+
+When a special keyword, ``update_schemes_tried_regions``, is written to the
+relevant ``kdamonds/<N>/state`` file, DAMON creates directories named integer
+starting from ``0`` under this directory.  Each directory contains files
+exposing detailed information about each of the memory region that the
+corresponding scheme's ``action`` has tried to be applied under this directory,
+during next :ref:`aggregation interval <sysfs_monitoring_attrs>`.  The
+information includes address range, ``nr_accesses``, , and ``age`` of the
+region.
+
+The directories will be removed when another special keyword,
+``clear_schemes_tried_regions``, is written to the relevant
+``kdamonds/<N>/state`` file.
+
+tried_regions/<N>/
+------------------
+
+In each region directory, you will find four files (``start``, ``end``,
+``nr_accesses``, and ``age``).  Reading the files will show the start and end
+addresses, ``nr_accesses``, and ``age`` of the region that corresponding
+DAMON-based operation scheme ``action`` has tried to be applied.
+
 Example
 ~~~~~~~

@ -465,8 +507,9 @@ regions in case of physical memory monitoring.  Therefore, users should set the
 monitoring target regions by themselves.

 In such cases, users can explicitly set the initial monitoring target regions
-as they want, by writing proper values to the ``init_regions`` file.  Each line
-of the input should represent one region in below form.::
+as they want, by writing proper values to the ``init_regions`` file.  The input
+should be a sequence of three integers separated by white spaces that represent
+one region in below form.::

    <target idx> <start address> <end address>

@ -481,9 +524,9 @@ ranges, ``20-40`` and ``50-100`` as that of pid 4242, which is the second one
    # cd <debugfs>/damon
    # cat target_ids
    42 4242
-    # echo "0   1       100
-            0   100     200
-            1   20      40
+    # echo "0   1       100 \
+            0   100     200 \
+            1   20      40  \
            1   50      100" > init_regions

 Note that this sets the initial monitoring target regions only.  In case of
--- a/Documentation/admin-guide/mm/zswap.rst
+++ b/Documentation/admin-guide/mm/zswap.rst
@ -14,13 +14,7 @@ for potentially reduced swap I/O.  This trade-off can also result in a
 significant performance improvement if reads from the compressed cache are
 faster than reads from a swap device.

-.. note::
-   Zswap is a new feature as of v3.11 and interacts heavily with memory
-   reclaim.  This interaction has not been fully explored on the large set of
-   potential configurations and workloads that exist.  For this reason, zswap
-   is a work in progress and should be considered experimental.
-
-   Some potential benefits:
+Some potential benefits:

 * Desktop/laptop users with limited RAM capacities can mitigate the
  performance impact of swapping.
--- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
+++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
@ -15,10 +15,10 @@ HiSilicon PCIe PMU driver
 The PCIe PMU driver registers a perf PMU with the name of its sicl-id and PCIe
 Core id.::

-  /sys/bus/event_source/hisi_pcie<sicl>_<core>
+  /sys/bus/event_source/hisi_pcie<sicl>_core<core>

 PMU driver provides description of available events and filter options in sysfs,
-see /sys/bus/event_source/devices/hisi_pcie<sicl>_<core>.
+see /sys/bus/event_source/devices/hisi_pcie<sicl>_core<core>.

 The "format" directory describes all formats of the config (events) and config1
 (filter options) fields of the perf_event_attr structure. The "events" directory
@ -33,13 +33,13 @@ monitored by PMU.
 Example usage of perf::

  $# perf list
-  hisi_pcie0_0/rx_mwr_latency/ [kernel PMU event]
-  hisi_pcie0_0/rx_mwr_cnt/ [kernel PMU event]
+  hisi_pcie0_core0/rx_mwr_latency/ [kernel PMU event]
+  hisi_pcie0_core0/rx_mwr_cnt/ [kernel PMU event]
  ------------------------------------------

-  $# perf stat -e hisi_pcie0_0/rx_mwr_latency/
-  $# perf stat -e hisi_pcie0_0/rx_mwr_cnt/
-  $# perf stat -g -e hisi_pcie0_0/rx_mwr_latency/ -e hisi_pcie0_0/rx_mwr_cnt/
+  $# perf stat -e hisi_pcie0_core0/rx_mwr_latency/
+  $# perf stat -e hisi_pcie0_core0/rx_mwr_cnt/
+  $# perf stat -g -e hisi_pcie0_core0/rx_mwr_latency/ -e hisi_pcie0_core0/rx_mwr_cnt/

 The current driver does not support sampling. So "perf record" is unsupported.
 Also attach to a task is unsupported for PCIe PMU.
@ -48,59 +48,83 @@ Filter options
 --------------

 1. Target filter
-PMU could only monitor the performance of traffic downstream target Root Ports
-or downstream target Endpoint. PCIe PMU driver support "port" and "bdf"
-interfaces for users, and these two interfaces aren't supported at the same
-time.

-port
-"port" filter can be used in all PCIe PMU events, target Root Port can be
-selected by configuring the 16-bits-bitmap "port". Multi ports can be selected
-for AP-layer-events, and only one port can be selected for TL/DL-layer-events.
+   PMU could only monitor the performance of traffic downstream target Root
+   Ports or downstream target Endpoint. PCIe PMU driver support "port" and
+   "bdf" interfaces for users, and these two interfaces aren't supported at the
+   same time.

-For example, if target Root Port is 0000:00:00.0 (x8 lanes), bit0 of bitmap
-should be set, port=0x1; if target Root Port is 0000:00:04.0 (x4 lanes),
-bit8 is set, port=0x100; if these two Root Ports are both monitored, port=0x101.
+   - port

-Example usage of perf::
+     "port" filter can be used in all PCIe PMU events, target Root Port can be
+     selected by configuring the 16-bits-bitmap "port". Multi ports can be
+     selected for AP-layer-events, and only one port can be selected for
+     TL/DL-layer-events.

-  $# perf stat -e hisi_pcie0_0/rx_mwr_latency,port=0x1/ sleep 5
+     For example, if target Root Port is 0000:00:00.0 (x8 lanes), bit0 of
+     bitmap should be set, port=0x1; if target Root Port is 0000:00:04.0 (x4
+     lanes), bit8 is set, port=0x100; if these two Root Ports are both
+     monitored, port=0x101.

-bdf
+     Example usage of perf::

-"bdf" filter can only be used in bandwidth events, target Endpoint is selected
-by configuring BDF to "bdf". Counter only counts the bandwidth of message
-requested by target Endpoint.
+       $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0x1/ sleep 5

-For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0.
+   - bdf

-Example usage of perf::
+     "bdf" filter can only be used in bandwidth events, target Endpoint is
+     selected by configuring BDF to "bdf". Counter only counts the bandwidth of
+     message requested by target Endpoint.

-  $# perf stat -e hisi_pcie0_0/rx_mrd_flux,bdf=0x3900/ sleep 5
+     For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0.
+
+     Example usage of perf::
+
+       $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,bdf=0x3900/ sleep 5

 2. Trigger filter
-Event statistics start when the first time TLP length is greater/smaller
-than trigger condition. You can set the trigger condition by writing "trig_len",
-and set the trigger mode by writing "trig_mode". This filter can only be used
-in bandwidth events.

-For example, "trig_len=4" means trigger condition is 2^4 DW, "trig_mode=0"
-means statistics start when TLP length > trigger condition, "trig_mode=1"
-means start when TLP length < condition.
+   Event statistics start when the first time TLP length is greater/smaller
+   than trigger condition. You can set the trigger condition by writing
+   "trig_len", and set the trigger mode by writing "trig_mode". This filter can
+   only be used in bandwidth events.

-Example usage of perf::
+   For example, "trig_len=4" means trigger condition is 2^4 DW, "trig_mode=0"
+   means statistics start when TLP length > trigger condition, "trig_mode=1"
+   means start when TLP length < condition.

-  $# perf stat -e hisi_pcie0_0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5
+   Example usage of perf::
+
+     $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5

 3. Threshold filter
-Counter counts when TLP length within the specified range. You can set the
-threshold by writing "thr_len", and set the threshold mode by writing
-"thr_mode". This filter can only be used in bandwidth events.

-For example, "thr_len=4" means threshold is 2^4 DW, "thr_mode=0" means
-counter counts when TLP length >= threshold, and "thr_mode=1" means counts
-when TLP length < threshold.
+   Counter counts when TLP length within the specified range. You can set the
+   threshold by writing "thr_len", and set the threshold mode by writing
+   "thr_mode". This filter can only be used in bandwidth events.

-Example usage of perf::
+   For example, "thr_len=4" means threshold is 2^4 DW, "thr_mode=0" means
+   counter counts when TLP length >= threshold, and "thr_mode=1" means counts
+   when TLP length < threshold.

-  $# perf stat -e hisi_pcie0_0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
+   Example usage of perf::
+
+     $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
+
+4. TLP Length filter
+
+   When counting bandwidth, the data can be composed of certain parts of TLP
+   packets. You can specify it through "len_mode":
+
+   - 2'b00: Reserved (Do not use this since the behaviour is undefined)
+   - 2'b01: Bandwidth of TLP payloads
+   - 2'b10: Bandwidth of TLP headers
+   - 2'b11: Bandwidth of both TLP payloads and headers
+
+   For example, "len_mode=2" means only counting the bandwidth of TLP headers
+   and "len_mode=3" means the final bandwidth data is composed of both TLP
+   headers and payloads. Default value if not specified is 2'b11.
+
+   Example usage of perf::
+
+     $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@ -19,3 +19,5 @@ Performance monitor support
   arm_dsu_pmu
   thunderx2-pmu
   alibaba_pmu
+   nvidia-pmu
+   meson-ddr-pmu
--- a/Documentation/admin-guide/perf/meson-ddr-pmu.rst
+++ b/Documentation/admin-guide/perf/meson-ddr-pmu.rst
@ -0,0 +1,70 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================================
+Amlogic SoC DDR Bandwidth Performance Monitoring Unit (PMU)
+===========================================================
+
+The Amlogic Meson G12 SoC contains a bandwidth monitor inside DRAM controller.
+The monitor includes 4 channels. Each channel can count the request accessing
+DRAM. The channel can count up to 3 AXI port simultaneously. It can be helpful
+to show if the performance bottleneck is on DDR bandwidth.
+
+Currently, this driver supports the following 5 perf events:
+
+ meson_ddr_bw/total_rw_bytes/
+ meson_ddr_bw/chan_1_rw_bytes/
+ meson_ddr_bw/chan_2_rw_bytes/
+ meson_ddr_bw/chan_3_rw_bytes/
+ meson_ddr_bw/chan_4_rw_bytes/
+
+meson_ddr_bw/chan_{1,2,3,4}_rw_bytes/ events are channel-specific events.
+Each channel support filtering, which can let the channel to monitor
+individual IP module in SoC.
+
+Below are DDR access request event filter keywords:
+
+ arm             - from CPU
+ vpu_read1       - from OSD + VPP read
+ gpu             - from 3D GPU
+ pcie            - from PCIe controller
+ hdcp            - from HDCP controller
+ hevc_front      - from HEVC codec front end
+ usb3_0          - from USB3.0 controller
+ hevc_back       - from HEVC codec back end
+ h265enc         - from HEVC encoder
+ vpu_read2       - from DI read
+ vpu_write1      - from VDIN write
+ vpu_write2      - from di write
+ vdec            - from legacy codec video decoder
+ hcodec          - from H264 encoder
+ ge2d            - from ge2d
+ spicc1          - from SPI controller 1
+ usb0            - from USB2.0 controller 0
+ dma             - from system DMA controller 1
+ arb0            - from arb0
+ sd_emmc_b       - from SD eMMC b controller
+ usb1            - from USB2.0 controller 1
+ audio           - from Audio module
+ sd_emmc_c       - from SD eMMC c controller
+ spicc2          - from SPI controller 2
+ ethernet        - from Ethernet controller
+
+
+Examples:
+
+  + Show the total DDR bandwidth per seconds:
+
+    .. code-block:: bash
+
+       perf stat -a -e meson_ddr_bw/total_rw_bytes/ -I 1000 sleep 10
+
+
+  + Show individual DDR bandwidth from CPU and GPU respectively, as well as
+    sum of them:
+
+    .. code-block:: bash
+
+       perf stat -a -e meson_ddr_bw/chan_1_rw_bytes,arm=1/ -I 1000 sleep 10
+       perf stat -a -e meson_ddr_bw/chan_2_rw_bytes,gpu=1/ -I 1000 sleep 10
+       perf stat -a -e meson_ddr_bw/chan_3_rw_bytes,arm=1,gpu=1/ -I 1000 sleep 10
+
--- a/Documentation/admin-guide/perf/nvidia-pmu.rst
+++ b/Documentation/admin-guide/perf/nvidia-pmu.rst
@ -0,0 +1,299 @@
+=========================================================
+NVIDIA Tegra SoC Uncore Performance Monitoring Unit (PMU)
+=========================================================
+
+The NVIDIA Tegra SoC includes various system PMUs to measure key performance
+metrics like memory bandwidth, latency, and utilization:
+
+* Scalable Coherency Fabric (SCF)
+* NVLink-C2C0
+* NVLink-C2C1
+* CNVLink
+* PCIE
+
+PMU Driver
+----------
+
+The PMUs in this document are based on ARM CoreSight PMU Architecture as
+described in document: ARM IHI 0091. Since this is a standard architecture, the
+PMUs are managed by a common driver "arm-cs-arch-pmu". This driver describes
+the available events and configuration of each PMU in sysfs. Please see the
+sections below to get the sysfs path of each PMU. Like other uncore PMU drivers,
+the driver provides "cpumask" sysfs attribute to show the CPU id used to handle
+the PMU event. There is also "associated_cpus" sysfs attribute, which contains a
+list of CPUs associated with the PMU instance.
+
+.. _SCF_PMU_Section:
+
+SCF PMU
+-------
+
+The SCF PMU monitors system level cache events, CPU traffic, and
+strongly-ordered (SO) PCIE write traffic to local/remote memory. Please see
+:ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about the PMU
+traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_scf_pmu_<socket-id>.
+
+Example usage:
+
+* Count event id 0x0 in socket 0::
+
+   perf stat -a -e nvidia_scf_pmu_0/event=0x0/
+
+* Count event id 0x0 in socket 1::
+
+   perf stat -a -e nvidia_scf_pmu_1/event=0x0/
+
+NVLink-C2C0 PMU
+--------------------
+
+The NVLink-C2C0 PMU monitors incoming traffic from a GPU/CPU connected with
+NVLink-C2C (Chip-2-Chip) interconnect. The type of traffic captured by this PMU
+varies dependent on the chip configuration:
+
+* NVIDIA Grace Hopper Superchip: Hopper GPU is connected with Grace SoC.
+
+  In this config, the PMU captures GPU ATS translated or EGM traffic from the GPU.
+
+* NVIDIA Grace CPU Superchip: two Grace CPU SoCs are connected.
+
+  In this config, the PMU captures read and relaxed ordered (RO) writes from
+  PCIE device of the remote SoC.
+
+Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
+the PMU traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
+
+Example usage:
+
+* Count event id 0x0 from the GPU/CPU connected with socket 0::
+
+   perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0/
+
+* Count event id 0x0 from the GPU/CPU connected with socket 1::
+
+   perf stat -a -e nvidia_nvlink_c2c0_pmu_1/event=0x0/
+
+* Count event id 0x0 from the GPU/CPU connected with socket 2::
+
+   perf stat -a -e nvidia_nvlink_c2c0_pmu_2/event=0x0/
+
+* Count event id 0x0 from the GPU/CPU connected with socket 3::
+
+   perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/
+
+NVLink-C2C1 PMU
+-------------------
+
+The NVLink-C2C1 PMU monitors incoming traffic from a GPU connected with
+NVLink-C2C (Chip-2-Chip) interconnect. This PMU captures untranslated GPU
+traffic, in contrast with NvLink-C2C0 PMU that captures ATS translated traffic.
+Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
+the PMU traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
+
+Example usage:
+
+* Count event id 0x0 from the GPU connected with socket 0::
+
+   perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0/
+
+* Count event id 0x0 from the GPU connected with socket 1::
+
+   perf stat -a -e nvidia_nvlink_c2c1_pmu_1/event=0x0/
+
+* Count event id 0x0 from the GPU connected with socket 2::
+
+   perf stat -a -e nvidia_nvlink_c2c1_pmu_2/event=0x0/
+
+* Count event id 0x0 from the GPU connected with socket 3::
+
+   perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/
+
+CNVLink PMU
+---------------
+
+The CNVLink PMU monitors traffic from GPU and PCIE device on remote sockets
+to local memory. For PCIE traffic, this PMU captures read and relaxed ordered
+(RO) write traffic. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section`
+for more info about the PMU traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>.
+
+Each SoC socket can be connected to one or more sockets via CNVLink. The user can
+use "rem_socket" bitmap parameter to select the remote socket(s) to monitor.
+Each bit represents the socket number, e.g. "rem_socket=0xE" corresponds to
+socket 1 to 3.
+/sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
+shows the valid bits that can be set in the "rem_socket" parameter.
+
+The PMU can not distinguish the remote traffic initiator, therefore it does not
+provide filter to select the traffic source to monitor. It reports combined
+traffic from remote GPU and PCIE devices.
+
+Example usage:
+
+* Count event id 0x0 for the traffic from remote socket 1, 2, and 3 to socket 0::
+
+   perf stat -a -e nvidia_cnvlink_pmu_0/event=0x0,rem_socket=0xE/
+
+* Count event id 0x0 for the traffic from remote socket 0, 2, and 3 to socket 1::
+
+   perf stat -a -e nvidia_cnvlink_pmu_1/event=0x0,rem_socket=0xD/
+
+* Count event id 0x0 for the traffic from remote socket 0, 1, and 3 to socket 2::
+
+   perf stat -a -e nvidia_cnvlink_pmu_2/event=0x0,rem_socket=0xB/
+
+* Count event id 0x0 for the traffic from remote socket 0, 1, and 2 to socket 3::
+
+   perf stat -a -e nvidia_cnvlink_pmu_3/event=0x0,rem_socket=0x7/
+
+
+PCIE PMU
+------------
+
+The PCIE PMU monitors all read/write traffic from PCIE root ports to
+local/remote memory. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section`
+for more info about the PMU traffic coverage.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>.
+
+Each SoC socket can support multiple root ports. The user can use
+"root_port" bitmap parameter to select the port(s) to monitor, i.e.
+"root_port=0xF" corresponds to root port 0 to 3.
+/sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
+shows the valid bits that can be set in the "root_port" parameter.
+
+Example usage:
+
+* Count event id 0x0 from root port 0 and 1 of socket 0::
+
+   perf stat -a -e nvidia_pcie_pmu_0/event=0x0,root_port=0x3/
+
+* Count event id 0x0 from root port 0 and 1 of socket 1::
+
+   perf stat -a -e nvidia_pcie_pmu_1/event=0x0,root_port=0x3/
+
+.. _NVIDIA_Uncore_PMU_Traffic_Coverage_Section:
+
+Traffic Coverage
+----------------
+
+The PMU traffic coverage may vary dependent on the chip configuration:
+
+* **NVIDIA Grace Hopper Superchip**: Hopper GPU is connected with Grace SoC.
+
+  Example configuration with two Grace SoCs::
+
+   *********************************          *********************************
+   * SOCKET-A                      *          * SOCKET-B                      *
+   *                               *          *                               *
+   *                     ::::::::  *          *  ::::::::                     *
+   *                     : PCIE :  *          *  : PCIE :                     *
+   *                     ::::::::  *          *  ::::::::                     *
+   *                         |     *          *      |                        *
+   *                         |     *          *      |                        *
+   *  :::::::            ::::::::: *          *  :::::::::            ::::::: *
+   *  :     :            :       : *          *  :       :            :     : *
+   *  : GPU :<--NVLink-->: Grace :<---CNVLink--->: Grace :<--NVLink-->: GPU : *
+   *  :     :    C2C     :  SoC  : *          *  :  SoC  :    C2C     :     : *
+   *  :::::::            ::::::::: *          *  :::::::::            ::::::: *
+   *     |                   |     *          *      |                   |    *
+   *     |                   |     *          *      |                   |    *
+   *  &&&&&&&&           &&&&&&&&  *          *   &&&&&&&&           &&&&&&&& *
+   *  & GMEM &           & CMEM &  *          *   & CMEM &           & GMEM & *
+   *  &&&&&&&&           &&&&&&&&  *          *   &&&&&&&&           &&&&&&&& *
+   *                               *          *                               *
+   *********************************          *********************************
+
+   GMEM = GPU Memory (e.g. HBM)
+   CMEM = CPU Memory (e.g. LPDDR5X)
+
+  |
+  | Following table contains traffic coverage of Grace SoC PMU in socket-A:
+
+  ::
+
+   +--------------+-------+-----------+-----------+-----+----------+----------+
+   |              |                        Source                             |
+   +              +-------+-----------+-----------+-----+----------+----------+
+   | Destination  |       |GPU ATS    |GPU Not-ATS|     | Socket-B | Socket-B |
+   |              |PCI R/W|Translated,|Translated | CPU | CPU/PCIE1| GPU/PCIE2|
+   |              |       |EGM        |           |     |          |          |
+   +==============+=======+===========+===========+=====+==========+==========+
+   | Local        | PCIE  |NVLink-C2C0|NVLink-C2C1| SCF | SCF PMU  | CNVLink  |
+   | SYSRAM/CMEM  | PMU   |PMU        |PMU        | PMU |          | PMU      |
+   +--------------+-------+-----------+-----------+-----+----------+----------+
+   | Local GMEM   | PCIE  |    N/A    |NVLink-C2C1| SCF | SCF PMU  | CNVLink  |
+   |              | PMU   |           |PMU        | PMU |          | PMU      |
+   +--------------+-------+-----------+-----------+-----+----------+----------+
+   | Remote       | PCIE  |NVLink-C2C0|NVLink-C2C1| SCF |          |          |
+   | SYSRAM/CMEM  | PMU   |PMU        |PMU        | PMU |   N/A    |   N/A    |
+   | over CNVLink |       |           |           |     |          |          |
+   +--------------+-------+-----------+-----------+-----+----------+----------+
+   | Remote GMEM  | PCIE  |NVLink-C2C0|NVLink-C2C1| SCF |          |          |
+   | over CNVLink | PMU   |PMU        |PMU        | PMU |   N/A    |   N/A    |
+   +--------------+-------+-----------+-----------+-----+----------+----------+
+
+   PCIE1 traffic represents strongly ordered (SO) writes.
+   PCIE2 traffic represents reads and relaxed ordered (RO) writes.
+
+* **NVIDIA Grace CPU Superchip**: two Grace CPU SoCs are connected.
+
+  Example configuration with two Grace SoCs::
+
+   *******************             *******************
+   * SOCKET-A        *             * SOCKET-B        *
+   *                 *             *                 *
+   *    ::::::::     *             *    ::::::::     *
+   *    : PCIE :     *             *    : PCIE :     *
+   *    ::::::::     *             *    ::::::::     *
+   *        |        *             *        |        *
+   *        |        *             *        |        *
+   *    :::::::::    *             *    :::::::::    *
+   *    :       :    *             *    :       :    *
+   *    : Grace :<--------NVLink------->: Grace :    *
+   *    :  SoC  :    *     C2C     *    :  SoC  :    *
+   *    :::::::::    *             *    :::::::::    *
+   *        |        *             *        |        *
+   *        |        *             *        |        *
+   *     &&&&&&&&    *             *     &&&&&&&&    *
+   *     & CMEM &    *             *     & CMEM &    *
+   *     &&&&&&&&    *             *     &&&&&&&&    *
+   *                 *             *                 *
+   *******************             *******************
+
+   GMEM = GPU Memory (e.g. HBM)
+   CMEM = CPU Memory (e.g. LPDDR5X)
+
+  |
+  | Following table contains traffic coverage of Grace SoC PMU in socket-A:
+
+  ::
+
+   +-----------------+-----------+---------+----------+-------------+
+   |                 |                      Source                  |
+   +                 +-----------+---------+----------+-------------+
+   | Destination     |           |         | Socket-B | Socket-B    |
+   |                 |  PCI R/W  |   CPU   | CPU/PCIE1| PCIE2       |
+   |                 |           |         |          |             |
+   +=================+===========+=========+==========+=============+
+   | Local           |  PCIE PMU | SCF PMU | SCF PMU  | NVLink-C2C0 |
+   | SYSRAM/CMEM     |           |         |          | PMU         |
+   +-----------------+-----------+---------+----------+-------------+
+   | Remote          |           |         |          |             |
+   | SYSRAM/CMEM     |  PCIE PMU | SCF PMU |   N/A    |     N/A     |
+   | over NVLink-C2C |           |         |          |             |
+   +-----------------+-----------+---------+----------+-------------+
+
+   PCIE1 traffic represents strongly ordered (SO) writes.
+   PCIE2 traffic represents reads and relaxed ordered (RO) writes.
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@ -283,23 +283,19 @@ efficiency frequency management method on AMD processors.
 Kernel Module Options for ``amd-pstate``
 =========================================

-.. _shared_mem:
+Passive Mode
+------------

-``shared_mem``
-Use a module param (shared_mem) to enable related processors manually with
-**amd_pstate.shared_mem=1**.
-Due to the performance issue on the processors with `Shared Memory Support
-<perf_cap_>`_, we disable it presently and will re-enable this by default
-once we address performance issue with this solution.
+``amd_pstate=passive``

-To check whether the current processor is using `Full MSR Support <perf_cap_>`_
-or `Shared Memory Support <perf_cap_>`_ : ::
-
-  ray@hr-test1:~$ lscpu | grep cppc
-  Flags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm
-
-If the CPU flags have ``cppc``, then this processor supports `Full MSR Support
-<perf_cap_>`_. Otherwise, it supports `Shared Memory Support <perf_cap_>`_.
+It will be enabled if the ``amd_pstate=passive`` is passed to the kernel in the command line.
+In this mode, ``amd_pstate`` driver software specifies a desired QoS target in the CPPC
+performance scale as a relative number. This can be expressed as percentage of nominal
+performance (infrastructure max). Below the nominal sustained performance level,
+desired performance expresses the average performance level of the processor subject
+to the Performance Reduction Tolerance register. Above the nominal performance level,
+processor must provide at least nominal performance requested and go higher if current
+operating conditions allow.


 ``cpupower`` tool support for ``amd-pstate``
@ -409,37 +405,55 @@ Unit Tests for amd-pstate

 1. Test case decriptions

+    1). Basic tests
+
+        Test prerequisite and basic functions for the ``amd-pstate`` driver.
+
        +---------+--------------------------------+------------------------------------------------------------------------------------+
        | Index   | Functions                      | Description                                                                        |
        +=========+================================+====================================================================================+
-        | 0       | amd_pstate_ut_acpi_cpc_valid   || Check whether the _CPC object is present in SBIOS.                                |
+        | 1       | amd_pstate_ut_acpi_cpc_valid   || Check whether the _CPC object is present in SBIOS.                                |
        |         |                                ||                                                                                   |
        |         |                                || The detail refer to `Processor Support <processor_support_>`_.                    |
        +---------+--------------------------------+------------------------------------------------------------------------------------+
-        | 1       | amd_pstate_ut_check_enabled    || Check whether AMD P-State is enabled.                                             |
+        | 2       | amd_pstate_ut_check_enabled    || Check whether AMD P-State is enabled.                                             |
        |         |                                ||                                                                                   |
        |         |                                || AMD P-States and ACPI hardware P-States always can be supported in one processor. |
        |         |                                | But AMD P-States has the higher priority and if it is enabled with                 |
        |         |                                | :c:macro:`MSR_AMD_CPPC_ENABLE` or ``cppc_set_enable``, it will respond to the      |
        |         |                                | request from AMD P-States.                                                         |
        +---------+--------------------------------+------------------------------------------------------------------------------------+
-        | 2       | amd_pstate_ut_check_perf       || Check if the each performance values are reasonable.                              |
+        | 3       | amd_pstate_ut_check_perf       || Check if the each performance values are reasonable.                              |
        |         |                                || highest_perf >= nominal_perf > lowest_nonlinear_perf > lowest_perf > 0.           |
        +---------+--------------------------------+------------------------------------------------------------------------------------+
-        | 3       | amd_pstate_ut_check_freq       || Check if the each frequency values and max freq when set support boost mode       |
+        | 4       | amd_pstate_ut_check_freq       || Check if the each frequency values and max freq when set support boost mode       |
        |         |                                | are reasonable.                                                                    |
        |         |                                || max_freq >= nominal_freq > lowest_nonlinear_freq > min_freq > 0                   |
        |         |                                || If boost is not active but supported, this maximum frequency will be larger than  |
        |         |                                | the one in ``cpuinfo``.                                                            |
        +---------+--------------------------------+------------------------------------------------------------------------------------+

+    2). Tbench test
+
+        Test and monitor the cpu changes when running tbench benchmark under the specified governor.
+        These changes include desire performance, frequency, load, performance, energy etc.
+        The specified governor is ondemand or schedutil.
+        Tbench can also be tested on the ``acpi-cpufreq`` kernel driver for comparison.
+
+    3). Gitsource test
+
+        Test and monitor the cpu changes when running gitsource benchmark under the specified governor.
+        These changes include desire performance, frequency, load, time, energy etc.
+        The specified governor is ondemand or schedutil.
+        Gitsource can also be tested on the ``acpi-cpufreq`` kernel driver for comparison.
+
 #. How to execute the tests

   We use test module in the kselftest frameworks to implement it.
-   We create amd-pstate-ut module and tie it into kselftest.(for
+   We create ``amd-pstate-ut`` module and tie it into kselftest.(for
   details refer to Linux Kernel Selftests [4]_).

-    1. Build
+    1). Build

        + open the :c:macro:`CONFIG_X86_AMD_PSTATE` configuration option.
        + set the :c:macro:`CONFIG_X86_AMD_PSTATE_UT` configuration option to M.
@ -449,23 +463,159 @@ Unit Tests for amd-pstate
            $ cd linux
            $ make -C tools/testing/selftests

-    #. Installation & Steps ::
+        + make perf ::
+
+            $ cd tools/perf/
+            $ make
+
+
+    2). Installation & Steps ::

        $ make -C tools/testing/selftests install INSTALL_PATH=~/kselftest
+        $ cp tools/perf/perf /usr/bin/perf
        $ sudo ./kselftest/run_kselftest.sh -c amd-pstate
-        TAP version 13
-        1..1
-        # selftests: amd-pstate: amd-pstate-ut.sh
-        # amd-pstate-ut: ok
-        ok 1 selftests: amd-pstate: amd-pstate-ut.sh

-    #. Results ::
+    3). Specified test case ::

-         $ dmesg | grep "amd_pstate_ut" | tee log.txt
-         [12977.570663] amd_pstate_ut: 1    amd_pstate_ut_acpi_cpc_valid  success!
-         [12977.570673] amd_pstate_ut: 2    amd_pstate_ut_check_enabled   success!
-         [12977.571207] amd_pstate_ut: 3    amd_pstate_ut_check_perf      success!
-         [12977.571212] amd_pstate_ut: 4    amd_pstate_ut_check_freq      success!
+        $ cd ~/kselftest/amd-pstate
+        $ sudo ./run.sh -t basic
+        $ sudo ./run.sh -t tbench
+        $ sudo ./run.sh -t tbench -m acpi-cpufreq
+        $ sudo ./run.sh -t gitsource
+        $ sudo ./run.sh -t gitsource -m acpi-cpufreq
+        $ ./run.sh --help
+        ./run.sh: illegal option -- -
+        Usage: ./run.sh [OPTION...]
+                [-h <help>]
+                [-o <output-file-for-dump>]
+                [-c <all: All testing,
+                     basic: Basic testing,
+                     tbench: Tbench testing,
+                     gitsource: Gitsource testing.>]
+                [-t <tbench time limit>]
+                [-p <tbench process number>]
+                [-l <loop times for tbench>]
+                [-i <amd tracer interval>]
+                [-m <comparative test: acpi-cpufreq>]
+
+
+    4). Results
+
+        + basic
+
+         When you finish test, you will get the following log info ::
+
+          $ dmesg | grep "amd_pstate_ut" | tee log.txt
+          [12977.570663] amd_pstate_ut: 1    amd_pstate_ut_acpi_cpc_valid  success!
+          [12977.570673] amd_pstate_ut: 2    amd_pstate_ut_check_enabled   success!
+          [12977.571207] amd_pstate_ut: 3    amd_pstate_ut_check_perf      success!
+          [12977.571212] amd_pstate_ut: 4    amd_pstate_ut_check_freq      success!
+
+        + tbench
+
+         When you finish test, you will get selftest.tbench.csv and png images.
+         The selftest.tbench.csv file contains the raw data and the drop of the comparative test.
+         The png images shows the performance, energy and performan per watt of each test.
+         Open selftest.tbench.csv :
+
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + Governor                                        | Round        | Des-perf | Freq    | Load     | Performance | Energy  | Performance Per Watt |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + Unit                                            |              |          | GHz     |          | MB/s        | J       | MB/J                 |
+         +=================================================+==============+==========+=========+==========+=============+=========+======================+
+         + amd-pstate-ondemand                             | 1            |          |         |          | 2504.05     | 1563.67 | 158.5378             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + amd-pstate-ondemand                             | 2            |          |         |          | 2243.64     | 1430.32 | 155.2941             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + amd-pstate-ondemand                             | 3            |          |         |          | 2183.88     | 1401.32 | 154.2860             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + amd-pstate-ondemand                             | Average      |          |         |          | 2310.52     | 1465.1  | 156.1268             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + amd-pstate-schedutil                            | 1            | 165.329  | 1.62257 | 99.798   | 2136.54     | 1395.26 | 151.5971             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + amd-pstate-schedutil                            | 2            | 166      | 1.49761 | 99.9993  | 2100.56     | 1380.5  | 150.6377             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + amd-pstate-schedutil                            | 3            | 166      | 1.47806 | 99.9993  | 2084.12     | 1375.76 | 149.9737             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + amd-pstate-schedutil                            | Average      | 165.776  | 1.53275 | 99.9322  | 2107.07     | 1383.84 | 150.7399             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-ondemand                           | 1            |          |         |          | 2529.9      | 1564.4  | 160.0997             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-ondemand                           | 2            |          |         |          | 2249.76     | 1432.97 | 155.4297             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-ondemand                           | 3            |          |         |          | 2181.46     | 1406.88 | 153.5060             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-ondemand                           | Average      |          |         |          | 2320.37     | 1468.08 | 156.4741             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-schedutil                          | 1            |          |         |          | 2137.64     | 1385.24 | 152.7723             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-schedutil                          | 2            |          |         |          | 2107.05     | 1372.23 | 152.0138             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-schedutil                          | 3            |          |         |          | 2085.86     | 1365.35 | 151.2433             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-schedutil                          | Average      |          |         |          | 2110.18     | 1374.27 | 152.0136             |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-ondemand VS acpi-cpufreq-schedutil | Comprison(%) |          |         |          | -9.0584     | -6.3899 | -2.8506              |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + amd-pstate-ondemand VS amd-pstate-schedutil     | Comprison(%) |          |         |          | 8.8053      | -5.5463 | -3.4503              |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-ondemand VS amd-pstate-ondemand    | Comprison(%) |          |         |          | -0.4245     | -0.2029 | -0.2219              |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-schedutil VS amd-pstate-schedutil  | Comprison(%) |          |         |          | -0.1473     | 0.6963  | -0.8378              |
+         +-------------------------------------------------+--------------+----------+---------+----------+-------------+---------+----------------------+
+
+        + gitsource
+
+         When you finish test, you will get selftest.gitsource.csv and png images.
+         The selftest.gitsource.csv file contains the raw data and the drop of the comparative test.
+         The png images shows the performance, energy and performan per watt of each test.
+         Open selftest.gitsource.csv :
+
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + Governor                                        | Round        | Des-perf | Freq     | Load     | Time        | Energy  | Performance Per Watt |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + Unit                                            |              |          | GHz      |          | s           | J       | 1/J                  |
+         +=================================================+==============+==========+==========+==========+=============+=========+======================+
+         + amd-pstate-ondemand                             | 1            | 50.119   | 2.10509  | 23.3076  | 475.69      | 865.78  | 0.001155027          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + amd-pstate-ondemand                             | 2            | 94.8006  | 1.98771  | 56.6533  | 467.1       | 839.67  | 0.001190944          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + amd-pstate-ondemand                             | 3            | 76.6091  | 2.53251  | 43.7791  | 467.69      | 855.85  | 0.001168429          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + amd-pstate-ondemand                             | Average      | 73.8429  | 2.20844  | 41.2467  | 470.16      | 853.767 | 0.001171279          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + amd-pstate-schedutil                            | 1            | 165.919  | 1.62319  | 98.3868  | 464.17      | 866.8   | 0.001153668          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + amd-pstate-schedutil                            | 2            | 165.97   | 1.31309  | 99.5712  | 480.15      | 880.4   | 0.001135847          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + amd-pstate-schedutil                            | 3            | 165.973  | 1.28448  | 99.9252  | 481.79      | 867.02  | 0.001153375          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + amd-pstate-schedutil                            | Average      | 165.954  | 1.40692  | 99.2944  | 475.37      | 871.407 | 0.001147569          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-ondemand                           | 1            |          |          |          | 2379.62     | 742.96  | 0.001345967          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-ondemand                           | 2            |          |          |          | 441.74      | 817.49  | 0.001223256          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-ondemand                           | 3            |          |          |          | 455.48      | 820.01  | 0.001219497          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-ondemand                           | Average      |          |          |          | 425.613     | 793.487 | 0.001260260          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-schedutil                          | 1            |          |          |          | 459.69      | 838.54  | 0.001192548          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-schedutil                          | 2            |          |          |          | 466.55      | 830.89  | 0.001203528          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-schedutil                          | 3            |          |          |          | 470.38      | 837.32  | 0.001194286          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-schedutil                          | Average      |          |          |          | 465.54      | 835.583 | 0.001196769          |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-ondemand VS acpi-cpufreq-schedutil | Comprison(%) |          |          |          | 9.3810      | 5.3051  | -5.0379              |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + amd-pstate-ondemand VS amd-pstate-schedutil     | Comprison(%) | 124.7392 | -36.2934 | 140.7329 | 1.1081      | 2.0661  | -2.0242              |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-ondemand VS amd-pstate-ondemand    | Comprison(%) |          |          |          | 10.4665     | 7.5968  | -7.0605              |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+
+         + acpi-cpufreq-schedutil VS amd-pstate-schedutil  | Comprison(%) |          |          |          | 2.1115      | 4.2873  | -4.1110              |
+         +-------------------------------------------------+--------------+----------+----------+----------+-------------+---------+----------------------+

 Reference
 ===========
--- a/Documentation/admin-guide/sysctl/fs.rst
+++ b/Documentation/admin-guide/sysctl/fs.rst
@ -2,8 +2,6 @@
 Documentation for /proc/sys/fs/
 ===============================

-kernel version 2.2.10
-
 Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>

 Copyright (c) 2009,        Shen Feng<shen@cn.fujitsu.com>
@ -12,58 +10,40 @@ For general info and legal blurb, please look in intro.rst.

 ------------------------------------------------------------------------------

-This file contains documentation for the sysctl files in
-/proc/sys/fs/ and is valid for Linux kernel version 2.2.
+This file contains documentation for the sysctl files and directories
+in ``/proc/sys/fs/``.

 The files in this directory can be used to tune and monitor
 miscellaneous and general things in the operation of the Linux
-kernel. Since some of the files _can_ be used to screw up your
+kernel. Since some of the files *can* be used to screw up your
 system, it is advisable to read both documentation and source
 before actually making adjustments.

 1. /proc/sys/fs
 ===============

-Currently, these files are in /proc/sys/fs:
+Currently, these files might (depending on your configuration)
+show up in ``/proc/sys/fs``:

- aio-max-nr
- aio-nr
- dentry-state
- dquot-max
- dquot-nr
- file-max
- file-nr
- inode-max
- inode-nr
- inode-state
- nr_open
- overflowuid
- overflowgid
- pipe-user-pages-hard
- pipe-user-pages-soft
- protected_fifos
- protected_hardlinks
- protected_regular
- protected_symlinks
- suid_dumpable
- super-max
- super-nr
+.. contents:: :local:


 aio-nr & aio-max-nr
 -------------------

-aio-nr is the running total of the number of events specified on the
-io_setup system call for all currently active aio contexts.  If aio-nr
-reaches aio-max-nr then io_setup will fail with EAGAIN.  Note that
-raising aio-max-nr does not result in the pre-allocation or re-sizing
-of any kernel data structures.
+``aio-nr`` shows the current system-wide number of asynchronous io
+requests.  ``aio-max-nr`` allows you to change the maximum value
+``aio-nr`` can grow to.  If ``aio-nr`` reaches ``aio-nr-max`` then
+``io_setup`` will fail with ``EAGAIN``.  Note that raising
+``aio-max-nr`` does not result in the
+pre-allocation or re-sizing of any kernel data structures.


 dentry-state
 ------------

-From linux/include/linux/dcache.h::
+This file shows the values in ``struct dentry_stat``, as defined in
+``linux/include/linux/dcache.h``::

  struct dentry_stat_t dentry_stat {
        int nr_dentry;
@ -76,55 +56,73 @@ From linux/include/linux/dcache.h::

 Dentries are dynamically allocated and deallocated.

-nr_dentry shows the total number of dentries allocated (active
-+ unused). nr_unused shows the number of dentries that are not
+``nr_dentry`` shows the total number of dentries allocated (active
+ unused). ``nr_unused shows`` the number of dentries that are not
 actively used, but are saved in the LRU list for future reuse.

-Age_limit is the age in seconds after which dcache entries
-can be reclaimed when memory is short and want_pages is
-nonzero when shrink_dcache_pages() has been called and the
+``age_limit`` is the age in seconds after which dcache entries
+can be reclaimed when memory is short and ``want_pages`` is
+nonzero when ``shrink_dcache_pages()`` has been called and the
 dcache isn't pruned yet.

-nr_negative shows the number of unused dentries that are also
+``nr_negative`` shows the number of unused dentries that are also
 negative dentries which do not map to any files. Instead,
 they help speeding up rejection of non-existing files provided
 by the users.


-dquot-max & dquot-nr
--------------------
-
-The file dquot-max shows the maximum number of cached disk
-quota entries.
-
-The file dquot-nr shows the number of allocated disk quota
-entries and the number of free disk quota entries.
-
-If the number of free cached disk quotas is very low and
-you have some awesome number of simultaneous system users,
-you might want to raise the limit.
-
-
 file-max & file-nr
 ------------------

-The value in file-max denotes the maximum number of file-
+The value in ``file-max`` denotes the maximum number of file-
 handles that the Linux kernel will allocate. When you get lots
 of error messages about running out of file handles, you might
 want to increase this limit.

 Historically,the kernel was able to allocate file handles
 dynamically, but not to free them again. The three values in
-file-nr denote the number of allocated file handles, the number
+``file-nr`` denote the number of allocated file handles, the number
 of allocated but unused file handles, and the maximum number of
-file handles. Linux 2.6 always reports 0 as the number of free
+file handles. Linux 2.6 and later always reports 0 as the number of free
 file handles -- this is not an error, it just means that the
 number of allocated file handles exactly matches the number of
 used file handles.

-Attempts to allocate more file descriptors than file-max are
-reported with printk, look for "VFS: file-max limit <number>
-reached".
+Attempts to allocate more file descriptors than ``file-max`` are
+reported with ``printk``, look for::
+
+  VFS: file-max limit <number> reached
+
+in the kernel logs.
+
+
+inode-nr & inode-state
+----------------------
+
+As with file handles, the kernel allocates the inode structures
+dynamically, but can't free them yet.
+
+The file ``inode-nr`` contains the first two items from
+``inode-state``, so we'll skip to that file...
+
+``inode-state`` contains three actual numbers and four dummies.
+The actual numbers are, in order of appearance, ``nr_inodes``,
+``nr_free_inodes`` and ``preshrink``.
+
+``nr_inodes`` stands for the number of inodes the system has
+allocated.
+
+``nr_free_inodes`` represents the number of free inodes (?) and
+preshrink is nonzero when the
+system needs to prune the inode list instead of allocating
+more.
+
+
+mount-max
+---------
+
+This denotes the maximum number of mounts that may exist
+in a mount namespace.


 nr_open
@ -132,39 +130,10 @@ nr_open

 This denotes the maximum number of file-handles a process can
 allocate. Default value is 1024*1024 (1048576) which should be
-enough for most machines. Actual limit depends on RLIMIT_NOFILE
+enough for most machines. Actual limit depends on ``RLIMIT_NOFILE``
 resource limit.


-inode-max, inode-nr & inode-state
---------------------------------
-
-As with file handles, the kernel allocates the inode structures
-dynamically, but can't free them yet.
-
-The value in inode-max denotes the maximum number of inode
-handlers. This value should be 3-4 times larger than the value
-in file-max, since stdin, stdout and network sockets also
-need an inode struct to handle them. When you regularly run
-out of inodes, you need to increase this value.
-
-The file inode-nr contains the first two items from
-inode-state, so we'll skip to that file...
-
-Inode-state contains three actual numbers and four dummies.
-The actual numbers are, in order of appearance, nr_inodes,
-nr_free_inodes and preshrink.
-
-Nr_inodes stands for the number of inodes the system has
-allocated, this can be slightly more than inode-max because
-Linux allocates them one pageful at a time.
-
-Nr_free_inodes represents the number of free inodes (?) and
-preshrink is nonzero when the nr_inodes > inode-max and the
-system needs to prune the inode list instead of allocating
-more.
-
-
 overflowgid & overflowuid
 -------------------------

@ -192,7 +161,7 @@ pipe-user-pages-soft
 Maximum total number of pages a non-privileged user may allocate for pipes
 before the pipe size gets limited to a single page. Once this limit is reached,
 new pipes will be limited to a single page in size for this user in order to
-limit total memory usage, and trying to increase them using fcntl() will be
+limit total memory usage, and trying to increase them using ``fcntl()`` will be
 denied until usage goes below the limit again. The default value allows to
 allocate up to 1024 pipes at their default size. When set to 0, no limit is
 applied.
@ -207,7 +176,7 @@ file.

 When set to "0", writing to FIFOs is unrestricted.

-When set to "1" don't allow O_CREAT open on FIFOs that we don't own
+When set to "1" don't allow ``O_CREAT`` open on FIFOs that we don't own
 in world writable sticky directories, unless they are owned by the
 owner of the directory.

@ -221,7 +190,7 @@ protected_hardlinks

 A long-standing class of security issues is the hardlink-based
 time-of-check-time-of-use race, most commonly seen in world-writable
-directories like /tmp. The common method of exploitation of this flaw
+directories like ``/tmp``. The common method of exploitation of this flaw
 is to cross privilege boundaries when following a given hardlink (i.e. a
 root process follows a hardlink created by another user). Additionally,
 on systems without separated partitions, this stops unauthorized users
@ -239,13 +208,13 @@ This protection is based on the restrictions in Openwall and grsecurity.
 protected_regular
 -----------------

-This protection is similar to protected_fifos, but it
+This protection is similar to `protected_fifos`_, but it
 avoids writes to an attacker-controlled regular file, where a program
 expected to create one.

 When set to "0", writing to regular files is unrestricted.

-When set to "1" don't allow O_CREAT open on regular files that we
+When set to "1" don't allow ``O_CREAT`` open on regular files that we
 don't own in world writable sticky directories, unless they are
 owned by the owner of the directory.

@ -257,7 +226,7 @@ protected_symlinks

 A long-standing class of security issues is the symlink-based
 time-of-check-time-of-use race, most commonly seen in world-writable
-directories like /tmp. The common method of exploitation of this flaw
+directories like ``/tmp``. The common method of exploitation of this flaw
 is to cross privilege boundaries when following a given symlink (i.e. a
 root process follows a symlink belonging to another user). For a likely
 incomplete list of hundreds of examples across the years, please see:
@ -272,23 +241,25 @@ follower match, or when the directory owner matches the symlink's owner.
 This protection is based on the restrictions in Openwall and grsecurity.


-suid_dumpable:
--------------
+suid_dumpable
+-------------

 This value can be used to query and set the core dump mode for setuid
 or otherwise protected/tainted binaries. The modes are

 =   ==========  ===============================================================
-0   (default)	traditional behaviour. Any process which has changed
+0   (default)	Traditional behaviour. Any process which has changed
 		privilege levels or is execute only will not be dumped.
-1   (debug)	all processes dump core when possible. The core dump is
+1   (debug)	All processes dump core when possible. The core dump is
 		owned by the current user and no security is applied. This is
 		intended for system debugging situations only.
 		Ptrace is unchecked.
 		This is insecure as it allows regular users to examine the
 		memory contents of privileged processes.
-2   (suidsafe)	any binary which normally would not be dumped is dumped
-		anyway, but only if the "core_pattern" kernel sysctl is set to
+2   (suidsafe)	Any binary which normally would not be dumped is dumped
+		anyway, but only if the ``core_pattern`` kernel sysctl (see
+		:ref:`Documentation/admin-guide/sysctl/kernel.rst <core_pattern>`)
+		is set to
 		either a pipe handler or a fully qualified path. (For more
 		details on this limitation, see CVE-2006-2451.) This mode is
 		appropriate when administrators are attempting to debug
@ -301,36 +272,11 @@ or otherwise protected/tainted binaries. The modes are
 =   ==========  ===============================================================


-super-max & super-nr
--------------------
-
-These numbers control the maximum number of superblocks, and
-thus the maximum number of mounted filesystems the kernel
-can have. You only need to increase super-max if you need to
-mount more filesystems than the current value in super-max
-allows you to.
-
-
-aio-nr & aio-max-nr
-------------------
-
-aio-nr shows the current system-wide number of asynchronous io
-requests.  aio-max-nr allows you to change the maximum value
-aio-nr can grow to.
-
-
-mount-max
---------
-
-This denotes the maximum number of mounts that may exist
-in a mount namespace.
-
-

 2. /proc/sys/fs/binfmt_misc
 ===========================

-Documentation for the files in /proc/sys/fs/binfmt_misc is
+Documentation for the files in ``/proc/sys/fs/binfmt_misc`` is
 in Documentation/admin-guide/binfmt-misc.rst.


@ -343,28 +289,32 @@ creation of a  user space  library that  implements  the  POSIX message queues
 API (as noted by the  MSG tag in the  POSIX 1003.1-2001 version  of the System
 Interfaces specification.)

-The "mqueue" filesystem contains values for determining/setting  the amount of
-resources used by the file system.
+The "mqueue" filesystem contains values for determining/setting the
+amount of resources used by the file system.

-/proc/sys/fs/mqueue/queues_max is a read/write  file for  setting/getting  the
-maximum number of message queues allowed on the system.
+``/proc/sys/fs/mqueue/queues_max`` is a read/write file for
+setting/getting the maximum number of message queues allowed on the
+system.

-/proc/sys/fs/mqueue/msg_max  is  a  read/write file  for  setting/getting  the
-maximum number of messages in a queue value.  In fact it is the limiting value
-for another (user) limit which is set in mq_open invocation. This attribute of
-a queue must be less or equal then msg_max.
+``/proc/sys/fs/mqueue/msg_max`` is a read/write file for
+setting/getting the maximum number of messages in a queue value.  In
+fact it is the limiting value for another (user) limit which is set in
+``mq_open`` invocation.  This attribute of a queue must be less than
+or equal to ``msg_max``.

-/proc/sys/fs/mqueue/msgsize_max is  a read/write  file for setting/getting the
-maximum  message size value (it is every  message queue's attribute set during
-its creation).
+``/proc/sys/fs/mqueue/msgsize_max`` is a read/write file for
+setting/getting the maximum message size value (it is an attribute of
+every message queue, set during its creation).

-/proc/sys/fs/mqueue/msg_default is  a read/write  file for setting/getting the
-default number of messages in a queue value if attr parameter of mq_open(2) is
-NULL. If it exceed msg_max, the default value is initialized msg_max.
+``/proc/sys/fs/mqueue/msg_default`` is a read/write file for
+setting/getting the default number of messages in a queue value if the
+``attr`` parameter of ``mq_open(2)`` is ``NULL``. If it exceeds
+``msg_max``, the default value is initialized to ``msg_max``.

-/proc/sys/fs/mqueue/msgsize_default is a read/write file for setting/getting
-the default message size value if attr parameter of mq_open(2) is NULL. If it
-exceed msgsize_max, the default value is initialized msgsize_max.
+``/proc/sys/fs/mqueue/msgsize_default`` is a read/write file for
+setting/getting the default message size value if the ``attr``
+parameter of ``mq_open(2)`` is ``NULL``. If it exceeds
+``msgsize_max``, the default value is initialized to ``msgsize_max``.

 4. /proc/sys/fs/epoll - Configuration options for the epoll interface
 =====================================================================
@ -378,7 +328,7 @@ Every epoll file descriptor can store a number of files to be monitored
 for event readiness. Each one of these monitored files constitutes a "watch".
 This configuration option sets the maximum number of "watches" that are
 allowed for each user.
-Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
-on a 64bit one.
-The current default value for  max_user_watches  is the 1/25 (4%) of the
-available low memory, divided for the "watch" cost in bytes.
+Each "watch" costs roughly 90 bytes on a 32-bit kernel, and roughly 160 bytes
+on a 64-bit one.
+The current default value for ``max_user_watches`` is 4% of the
+available low memory, divided by the "watch" cost in bytes.
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@ -139,6 +139,8 @@ Highest valid capability of the running kernel.  Exports
 ``CAP_LAST_CAP`` from the kernel.


+.. _core_pattern:
+
 core_pattern
 ============

@ -174,6 +176,7 @@ core_pattern
 	%f      	executable filename
 	%E		executable path
 	%c		maximum size of core file by resource limit RLIMIT_CORE
+	%C		CPU the task ran on
 	%<OTHER>	both are dropped
 	========	==========================================

@ -667,6 +670,15 @@ This is the default behavior.
 an oops event is detected.


+oops_limit
+==========
+
+Number of kernel oopses after which the kernel should panic when
+``panic_on_oops`` is not set. Setting this to 0 disables checking
+the count. Setting this to  1 has the same effect as setting
+``panic_on_oops=1``. The default value is 10000.
+
+
 osrelease, ostype & version
 ===========================

@ -1314,6 +1326,29 @@ watchdog work to be queued by the watchdog timer function, otherwise the NMI
 watchdog — if enabled — can detect a hard lockup condition.


+split_lock_mitigate (x86 only)
+==============================
+
+On x86, each "split lock" imposes a system-wide performance penalty. On larger
+systems, large numbers of split locks from unprivileged users can result in
+denials of service to well-behaved and potentially more important users.
+
+The kernel mitigates these bad users by detecting split locks and imposing
+penalties: forcing them to wait and only allowing one core to execute split
+locks at a time.
+
+These mitigations can make those bad applications unbearably slow. Setting
+split_lock_mitigate=0 may restore some application performance, but will also
+increase system exposure to denial of service attacks from split lock users.
+
+= ===================================================================
+0 Disable the mitigation mode - just warns the split lock on kernel log
+  and exposes the system to denials of service from the split lockers.
+1 Enable the mitigation mode (this is the default) - penalizes the split
+  lockers with intentional performance degradation.
+= ===================================================================
+
+
 stack_erasing
 =============

@ -1500,6 +1535,16 @@ entry will default to 2 instead of 0.
 2 Unprivileged calls to ``bpf()`` are disabled
 = =============================================================

+
+warn_limit
+==========
+
+Number of kernel warnings after which the kernel should panic when
+``panic_on_warn`` is not set. Setting this to 0 disables checking
+the warning count. Setting this to 1 has the same effect as setting
+``panic_on_warn=1``. The default value is 0.
+
+
 watchdog
 ========

--- a/Documentation/arm/marvell.rst
+++ b/Documentation/arm/marvell.rst
@ -14,18 +14,20 @@ Orion family

  Flavors:
        - 88F5082
-        - 88F5181
-        - 88F5181L
-        - 88F5182
+        - 88F5181  a.k.a Orion-1
+        - 88F5181L a.k.a Orion-VoIP
+        - 88F5182  a.k.a Orion-NAS

               - Datasheet: https://web.archive.org/web/20210124231420/http://csclub.uwaterloo.ca/~board/ts7800/MV88F5182-datasheet.pdf
               - Programmer's User Guide: https://web.archive.org/web/20210124231536/http://csclub.uwaterloo.ca/~board/ts7800/MV88F5182-opensource-manual.pdf
               - User Manual: https://web.archive.org/web/20210124231631/http://csclub.uwaterloo.ca/~board/ts7800/MV88F5182-usermanual.pdf
               - Functional Errata: https://web.archive.org/web/20210704165540/https://www.digriz.org.uk/ts78xx/88F5182_Functional_Errata.pdf
-        - 88F5281
+        - 88F5281  a.k.a Orion-2

               - Datasheet: https://web.archive.org/web/20131028144728/http://www.ocmodshop.com/images/reviews/networking/qnap_ts409u/marvel_88f5281_data_sheet.pdf
-        - 88F6183
+        - 88F6183  a.k.a Orion-1-90
+  Homepage:
+        https://web.archive.org/web/20080607215437/http://www.marvell.com/products/media/index.jsp
  Core:
 	Feroceon 88fr331 (88f51xx) or 88fr531-vd (88f52xx) ARMv5 compatible
  Linux kernel mach directory:
--- a/Documentation/arm64/acpi_object_usage.rst
+++ b/Documentation/arm64/acpi_object_usage.rst
@ -163,7 +163,7 @@ FPDT   Section 5.2.23 (signature == "FPDT")

       **Firmware Performance Data Table**

-       Optional, not currently supported.
+       Optional, useful for boot performance profiling.

 GTDT   Section 5.2.24 (signature == "GTDT")

--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@ -121,8 +121,9 @@ Header notes:
 			  to the base of DRAM, since memory below it is not
 			  accessible via the linear mapping
 			1
-			  2MB aligned base may be anywhere in physical
-			  memory
+			  2MB aligned base such that all image_size bytes
+			  counted from the start of the image are within
+			  the 48-bit addressable range of physical memory
  Bits 4-63	Reserved.
  ============= ===============================================================

@ -348,7 +349,7 @@ Before jumping into the kernel, the following conditions must be met:

    - HWFGWTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01.

-  For CPUs with the Scalable Matrix Extension FA64 feature (FEAT_SME_FA64)
+  For CPUs with the Scalable Matrix Extension FA64 feature (FEAT_SME_FA64):

  - If EL3 is present:

--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@ -275,6 +275,15 @@ HWCAP2_EBF16
 HWCAP2_SVE_EBF16
    Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0010.

+HWCAP2_CSSC
+    Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0001.
+
+HWCAP2_RPRFM
+    Functionality implied by ID_AA64ISAR2_EL1.RPRFM == 0b0001.
+
+HWCAP2_SVE2P1
+    Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0010.
+
 4. Unused AT_HWCAP bits
 -----------------------

--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@ -120,6 +120,8 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2224489        | ARM64_ERRATUM_2224489       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A715     | #2645198        | ARM64_ERRATUM_2645198       |
+----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-X2       | #2119858        | ARM64_ERRATUM_2119858       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-X2       | #2224489        | ARM64_ERRATUM_2224489       |
--- a/Documentation/arm64/sve.rst
+++ b/Documentation/arm64/sve.rst
@ -52,6 +52,7 @@ model features for SVE is included in Appendix A.
 	HWCAP2_SVEBITPERM
 	HWCAP2_SVESHA3
 	HWCAP2_SVESM4
+	HWCAP2_SVE2P1

  This list may be extended over time as the SVE architecture evolves.

--- a/Documentation/block/inline-encryption.rst
+++ b/Documentation/block/inline-encryption.rst
@ -142,7 +142,7 @@ Therefore, we also introduce *blk-crypto-fallback*, which is an implementation
 of inline encryption using the kernel crypto API.  blk-crypto-fallback is built
 into the block layer, so it works on any block device without any special setup.
 Essentially, when a bio with an encryption context is submitted to a
-request_queue that doesn't support that encryption context, the block layer will
+block_device that doesn't support that encryption context, the block layer will
 handle en/decryption of the bio using blk-crypto-fallback.

 For encryption, the data cannot be encrypted in-place, as callers usually rely
@ -187,7 +187,7 @@ API presented to users of the block layer

 ``blk_crypto_config_supported()`` allows users to check ahead of time whether
 inline encryption with particular crypto settings will work on a particular
-request_queue -- either via hardware or via blk-crypto-fallback.  This function
+block_device -- either via hardware or via blk-crypto-fallback.  This function
 takes in a ``struct blk_crypto_config`` which is like blk_crypto_key, but omits
 the actual bytes of the key and instead just contains the algorithm, data unit
 size, etc.  This function can be useful if blk-crypto-fallback is disabled.
@ -195,7 +195,7 @@ size, etc.  This function can be useful if blk-crypto-fallback is disabled.
 ``blk_crypto_init_key()`` allows users to initialize a blk_crypto_key.

 Users must call ``blk_crypto_start_using_key()`` before actually starting to use
-a blk_crypto_key on a request_queue (even if ``blk_crypto_config_supported()``
+a blk_crypto_key on a block_device (even if ``blk_crypto_config_supported()``
 was called earlier).  This is needed to initialize blk-crypto-fallback if it
 will be needed.  This must not be called from the data path, as this may have to
 allocate resources, which may deadlock in that case.
@ -207,7 +207,7 @@ for en/decryption.  Users don't need to worry about freeing the bio_crypt_ctx
 later, as that happens automatically when the bio is freed or reset.

 Finally, when done using inline encryption with a blk_crypto_key on a
-request_queue, users must call ``blk_crypto_evict_key()``.  This ensures that
+block_device, users must call ``blk_crypto_evict_key()``.  This ensures that
 the key is evicted from all keyslots it may be programmed into and unlinked from
 any kernel data structures it may be linked into.

@ -221,9 +221,9 @@ as follows:
 5. ``blk_crypto_evict_key()`` (after all I/O has completed)
 6. Zeroize the blk_crypto_key (this has no dedicated function)

-If a blk_crypto_key is being used on multiple request_queues, then
+If a blk_crypto_key is being used on multiple block_devices, then
 ``blk_crypto_config_supported()`` (if used), ``blk_crypto_start_using_key()``,
-and ``blk_crypto_evict_key()`` must be called on each request_queue.
+and ``blk_crypto_evict_key()`` must be called on each block_device.

 API presented to device drivers
 ===============================
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@ -298,3 +298,48 @@ A: NO.

 The BTF_ID macro does not cause a function to become part of the ABI
 any more than does the EXPORT_SYMBOL_GPL macro.
+
+Q: What is the compatibility story for special BPF types in map values?
+-----------------------------------------------------------------------
+Q: Users are allowed to embed bpf_spin_lock, bpf_timer fields in their BPF map
+values (when using BTF support for BPF maps). This allows to use helpers for
+such objects on these fields inside map values. Users are also allowed to embed
+pointers to some kernel types (with __kptr and __kptr_ref BTF tags). Will the
+kernel preserve backwards compatibility for these features?
+
+A: It depends. For bpf_spin_lock, bpf_timer: YES, for kptr and everything else:
+NO, but see below.
+
+For struct types that have been added already, like bpf_spin_lock and bpf_timer,
+the kernel will preserve backwards compatibility, as they are part of UAPI.
+
+For kptrs, they are also part of UAPI, but only with respect to the kptr
+mechanism. The types that you can use with a __kptr and __kptr_ref tagged
+pointer in your struct are NOT part of the UAPI contract. The supported types can
+and will change across kernel releases. However, operations like accessing kptr
+fields and bpf_kptr_xchg() helper will continue to be supported across kernel
+releases for the supported types.
+
+For any other supported struct type, unless explicitly stated in this document
+and added to bpf.h UAPI header, such types can and will arbitrarily change their
+size, type, and alignment, or any other user visible API or ABI detail across
+kernel releases. The users must adapt their BPF programs to the new changes and
+update them to make sure their programs continue to work correctly.
+
+NOTE: BPF subsystem specially reserves the 'bpf\_' prefix for type names, in
+order to introduce more special fields in the future. Hence, user programs must
+avoid defining types with 'bpf\_' prefix to not be broken in future releases.
+In other words, no backwards compatibility is guaranteed if one using a type
+in BTF with 'bpf\_' prefix.
+
+Q: What is the compatibility story for special BPF types in allocated objects?
+------------------------------------------------------------------------------
+Q: Same as above, but for allocated objects (i.e. objects allocated using
+bpf_obj_new for user defined types). Will the kernel preserve backwards
+compatibility for these features?
+
+A: NO.
+
+Unlike map value types, there are no stability guarantees for this case. The
+whole API to work with allocated objects and any support for special fields
+inside them is unstable (since it is exposed through kfuncs).
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@ -44,6 +44,33 @@ is a guarantee that the reported issue will be overlooked.**
 Submitting patches
 ==================

+Q: How do I run BPF CI on my changes before sending them out for review?
+------------------------------------------------------------------------
+A: BPF CI is GitHub based and hosted at https://github.com/kernel-patches/bpf.
+While GitHub also provides a CLI that can be used to accomplish the same
+results, here we focus on the UI based workflow.
+
+The following steps lay out how to start a CI run for your patches:
+
+- Create a fork of the aforementioned repository in your own account (one time
+  action)
+
+- Clone the fork locally, check out a new branch tracking either the bpf-next
+  or bpf branch, and apply your to-be-tested patches on top of it
+
+- Push the local branch to your fork and create a pull request against
+  kernel-patches/bpf's bpf-next_base or bpf_base branch, respectively
+
+Shortly after the pull request has been created, the CI workflow will run. Note
+that capacity is shared with patches submitted upstream being checked and so
+depending on utilization the run can take a while to finish.
+
+Note furthermore that both base branches (bpf-next_base and bpf_base) will be
+updated as patches are pushed to the respective upstream branches they track. As
+such, your patch set will automatically (be attempted to) be rebased as well.
+This behavior can result in a CI run being aborted and restarted with the new
+base line.
+
 Q: To which mailing list do I need to submit my BPF patches?
 ------------------------------------------------------------
 A: Please submit your BPF patches to the bpf kernel mailing list:
--- a/Documentation/bpf/bpf_iterators.rst
+++ b/Documentation/bpf/bpf_iterators.rst
@ -0,0 +1,485 @@
+=============
+BPF Iterators
+=============
+
+
+----------
+Motivation
+----------
+
+There are a few existing ways to dump kernel data into user space. The most
+popular one is the ``/proc`` system. For example, ``cat /proc/net/tcp6`` dumps
+all tcp6 sockets in the system, and ``cat /proc/net/netlink`` dumps all netlink
+sockets in the system. However, their output format tends to be fixed, and if
+users want more information about these sockets, they have to patch the kernel,
+which often takes time to publish upstream and release. The same is true for popular
+tools like `ss <https://man7.org/linux/man-pages/man8/ss.8.html>`_ where any
+additional information needs a kernel patch.
+
+To solve this problem, the `drgn
+<https://www.kernel.org/doc/html/latest/bpf/drgn.html>`_ tool is often used to
+dig out the kernel data with no kernel change. However, the main drawback for
+drgn is performance, as it cannot do pointer tracing inside the kernel. In
+addition, drgn cannot validate a pointer value and may read invalid data if the
+pointer becomes invalid inside the kernel.
+
+The BPF iterator solves the above problem by providing flexibility on what data
+(e.g., tasks, bpf_maps, etc.) to collect by calling BPF programs for each kernel
+data object.
+
+----------------------
+How BPF Iterators Work
+----------------------
+
+A BPF iterator is a type of BPF program that allows users to iterate over
+specific types of kernel objects. Unlike traditional BPF tracing programs that
+allow users to define callbacks that are invoked at particular points of
+execution in the kernel, BPF iterators allow users to define callbacks that
+should be executed for every entry in a variety of kernel data structures.
+
+For example, users can define a BPF iterator that iterates over every task on
+the system and dumps the total amount of CPU runtime currently used by each of
+them. Another BPF task iterator may instead dump the cgroup information for each
+task. Such flexibility is the core value of BPF iterators.
+
+A BPF program is always loaded into the kernel at the behest of a user space
+process. A user space process loads a BPF program by opening and initializing
+the program skeleton as required and then invoking a syscall to have the BPF
+program verified and loaded by the kernel.
+
+In traditional tracing programs, a program is activated by having user space
+obtain a ``bpf_link`` to the program with ``bpf_program__attach()``. Once
+activated, the program callback will be invoked whenever the tracepoint is
+triggered in the main kernel. For BPF iterator programs, a ``bpf_link`` to the
+program is obtained using ``bpf_link_create()``, and the program callback is
+invoked by issuing system calls from user space.
+
+Next, let us see how you can use the iterators to iterate on kernel objects and
+read data.
+
+------------------------
+How to Use BPF iterators
+------------------------
+
+BPF selftests are a great resource to illustrate how to use the iterators. In
+this section, we’ll walk through a BPF selftest which shows how to load and use
+a BPF iterator program.   To begin, we’ll look at `bpf_iter.c
+<https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/prog_tests/bpf_iter.c>`_,
+which illustrates how to load and trigger BPF iterators on the user space side.
+Later, we’ll look at a BPF program that runs in kernel space.
+
+Loading a BPF iterator in the kernel from user space typically involves the
+following steps:
+
+* The BPF program is loaded into the kernel through ``libbpf``. Once the kernel
+  has verified and loaded the program, it returns a file descriptor (fd) to user
+  space.
+* Obtain a ``link_fd`` to the BPF program by calling the ``bpf_link_create()``
+  specified with the BPF program file descriptor received from the kernel.
+* Next, obtain a BPF iterator file descriptor (``bpf_iter_fd``) by calling the
+  ``bpf_iter_create()`` specified with the ``bpf_link`` received from Step 2.
+* Trigger the iteration by calling ``read(bpf_iter_fd)`` until no data is
+  available.
+* Close the iterator fd using ``close(bpf_iter_fd)``.
+* If needed to reread the data, get a new ``bpf_iter_fd`` and do the read again.
+
+The following are a few examples of selftest BPF iterator programs:
+
+* `bpf_iter_tcp4.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c>`_
+* `bpf_iter_task_vma.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c>`_
+* `bpf_iter_task_file.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c>`_
+
+Let us look at ``bpf_iter_task_file.c``, which runs in kernel space:
+
+Here is the definition of ``bpf_iter__task_file`` in `vmlinux.h
+<https://facebookmicrosites.github.io/bpf/blog/2020/02/19/bpf-portability-and-co-re.html#btf>`_.
+Any struct name in ``vmlinux.h`` in the format ``bpf_iter__<iter_name>``
+represents a BPF iterator. The suffix ``<iter_name>`` represents the type of
+iterator.
+
+::
+
+    struct bpf_iter__task_file {
+            union {
+                struct bpf_iter_meta *meta;
+            };
+            union {
+                struct task_struct *task;
+            };
+            u32 fd;
+            union {
+                struct file *file;
+            };
+    };
+
+In the above code, the field 'meta' contains the metadata, which is the same for
+all BPF iterator programs. The rest of the fields are specific to different
+iterators. For example, for task_file iterators, the kernel layer provides the
+'task', 'fd' and 'file' field values. The 'task' and 'file' are `reference
+counted
+<https://facebookmicrosites.github.io/bpf/blog/2018/08/31/object-lifetime.html#file-descriptors-and-reference-counters>`_,
+so they won't go away when the BPF program runs.
+
+Here is a snippet from the  ``bpf_iter_task_file.c`` file:
+
+::
+
+  SEC("iter/task_file")
+  int dump_task_file(struct bpf_iter__task_file *ctx)
+  {
+    struct seq_file *seq = ctx->meta->seq;
+    struct task_struct *task = ctx->task;
+    struct file *file = ctx->file;
+    __u32 fd = ctx->fd;
+
+    if (task == NULL || file == NULL)
+      return 0;
+
+    if (ctx->meta->seq_num == 0) {
+      count = 0;
+      BPF_SEQ_PRINTF(seq, "    tgid      gid       fd      file\n");
+    }
+
+    if (tgid == task->tgid && task->tgid != task->pid)
+      count++;
+
+    if (last_tgid != task->tgid) {
+      last_tgid = task->tgid;
+      unique_tgid_count++;
+    }
+
+    BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+            (long)file->f_op);
+    return 0;
+  }
+
+In the above example, the section name ``SEC(iter/task_file)``, indicates that
+the program is a BPF iterator program to iterate all files from all tasks. The
+context of the program is ``bpf_iter__task_file`` struct.
+
+The user space program invokes the BPF iterator program running in the kernel
+by issuing a ``read()`` syscall. Once invoked, the BPF
+program can export data to user space using a variety of BPF helper functions.
+You can use either ``bpf_seq_printf()`` (and BPF_SEQ_PRINTF helper macro) or
+``bpf_seq_write()`` function based on whether you need formatted output or just
+binary data, respectively. For binary-encoded data, the user space applications
+can process the data from ``bpf_seq_write()`` as needed. For the formatted data,
+you can use ``cat <path>`` to print the results similar to ``cat
+/proc/net/netlink`` after pinning the BPF iterator to the bpffs mount. Later,
+use  ``rm -f <path>`` to remove the pinned iterator.
+
+For example, you can use the following command to create a BPF iterator from the
+``bpf_iter_ipv6_route.o`` object file and pin it to the ``/sys/fs/bpf/my_route``
+path:
+
+::
+
+  $ bpftool iter pin ./bpf_iter_ipv6_route.o  /sys/fs/bpf/my_route
+
+And then print out the results using the following command:
+
+::
+
+  $ cat /sys/fs/bpf/my_route
+
+
+-------------------------------------------------------
+Implement Kernel Support for BPF Iterator Program Types
+-------------------------------------------------------
+
+To implement a BPF iterator in the kernel, the developer must make a one-time
+change to the following key data structure defined in the `bpf.h
+<https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/include/linux/bpf.h>`_
+file.
+
+::
+
+  struct bpf_iter_reg {
+            const char *target;
+            bpf_iter_attach_target_t attach_target;
+            bpf_iter_detach_target_t detach_target;
+            bpf_iter_show_fdinfo_t show_fdinfo;
+            bpf_iter_fill_link_info_t fill_link_info;
+            bpf_iter_get_func_proto_t get_func_proto;
+            u32 ctx_arg_info_size;
+            u32 feature;
+            struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
+            const struct bpf_iter_seq_info *seq_info;
+  };
+
+After filling the data structure fields, call ``bpf_iter_reg_target()`` to
+register the iterator to the main BPF iterator subsystem.
+
+The following is the breakdown for each field in struct ``bpf_iter_reg``.
+
+.. list-table::
+   :widths: 25 50
+   :header-rows: 1
+
+   * - Fields
+     - Description
+   * - target
+     - Specifies the name of the BPF iterator. For example: ``bpf_map``,
+       ``bpf_map_elem``. The name should be different from other ``bpf_iter`` target names in the kernel.
+   * - attach_target and detach_target
+     - Allows for target specific ``link_create`` action since some targets
+       may need special processing. Called during the user space link_create stage.
+   * - show_fdinfo and fill_link_info
+     - Called to fill target specific information when user tries to get link
+       info associated with the iterator.
+   * - get_func_proto
+     - Permits a BPF iterator to access BPF helpers specific to the iterator.
+   * - ctx_arg_info_size and ctx_arg_info
+     - Specifies the verifier states for BPF program arguments associated with
+       the bpf iterator.
+   * - feature
+     - Specifies certain action requests in the kernel BPF iterator
+       infrastructure. Currently, only BPF_ITER_RESCHED is supported. This means
+       that the kernel function cond_resched() is called to avoid other kernel
+       subsystem (e.g., rcu) misbehaving.
+   * - seq_info
+     - Specifies certain action requests in the kernel BPF iterator
+       infrastructure. Currently, only BPF_ITER_RESCHED is supported. This means
+       that the kernel function cond_resched() is called to avoid other kernel
+       subsystem (e.g., rcu) misbehaving.
+
+
+`Click here
+<https://lore.kernel.org/bpf/20210212183107.50963-2-songliubraving@fb.com/>`_
+to see an implementation of the ``task_vma`` BPF iterator in the kernel.
+
+---------------------------------
+Parameterizing BPF Task Iterators
+---------------------------------
+
+By default, BPF iterators walk through all the objects of the specified types
+(processes, cgroups, maps, etc.) across the entire system to read relevant
+kernel data. But often, there are cases where we only care about a much smaller
+subset of iterable kernel objects, such as only iterating tasks within a
+specific process. Therefore, BPF iterator programs support filtering out objects
+from iteration by allowing user space to configure the iterator program when it
+is attached.
+
+--------------------------
+BPF Task Iterator Program
+--------------------------
+
+The following code is a BPF iterator program to print files and task information
+through the ``seq_file`` of the iterator. It is a standard BPF iterator program
+that visits every file of an iterator. We will use this BPF program in our
+example later.
+
+::
+
+  #include <vmlinux.h>
+  #include <bpf/bpf_helpers.h>
+
+  char _license[] SEC("license") = "GPL";
+
+  SEC("iter/task_file")
+  int dump_task_file(struct bpf_iter__task_file *ctx)
+  {
+        struct seq_file *seq = ctx->meta->seq;
+        struct task_struct *task = ctx->task;
+        struct file *file = ctx->file;
+        __u32 fd = ctx->fd;
+        if (task == NULL || file == NULL)
+                return 0;
+        if (ctx->meta->seq_num == 0) {
+                BPF_SEQ_PRINTF(seq, "    tgid      pid       fd      file\n");
+        }
+        BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+                        (long)file->f_op);
+        return 0;
+  }
+
+----------------------------------------
+Creating a File Iterator with Parameters
+----------------------------------------
+
+Now, let us look at how to create an iterator that includes only files of a
+process.
+
+First,  fill the ``bpf_iter_attach_opts`` struct as shown below:
+
+::
+
+  LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+  union bpf_iter_link_info linfo;
+  memset(&linfo, 0, sizeof(linfo));
+  linfo.task.pid = getpid();
+  opts.link_info = &linfo;
+  opts.link_info_len = sizeof(linfo);
+
+``linfo.task.pid``, if it is non-zero, directs the kernel to create an iterator
+that only includes opened files for the process with the specified ``pid``. In
+this example, we will only be iterating files for our process. If
+``linfo.task.pid`` is zero, the iterator will visit every opened file of every
+process. Similarly, ``linfo.task.tid`` directs the kernel to create an iterator
+that visits opened files of a specific thread, not a process. In this example,
+``linfo.task.tid`` is different from ``linfo.task.pid`` only if the thread has a
+separate file descriptor table. In most circumstances, all process threads share
+a single file descriptor table.
+
+Now, in the userspace program, pass the pointer of struct to the
+``bpf_program__attach_iter()``.
+
+::
+
+  link = bpf_program__attach_iter(prog, &opts); iter_fd =
+  bpf_iter_create(bpf_link__fd(link));
+
+If both *tid* and *pid* are zero, an iterator created from this struct
+``bpf_iter_attach_opts`` will include every opened file of every task in the
+system (in the namespace, actually.) It is the same as passing a NULL as the
+second argument to ``bpf_program__attach_iter()``.
+
+The whole program looks like the following code:
+
+::
+
+  #include <stdio.h>
+  #include <unistd.h>
+  #include <bpf/bpf.h>
+  #include <bpf/libbpf.h>
+  #include "bpf_iter_task_ex.skel.h"
+
+  static int do_read_opts(struct bpf_program *prog, struct bpf_iter_attach_opts *opts)
+  {
+        struct bpf_link *link;
+        char buf[16] = {};
+        int iter_fd = -1, len;
+        int ret = 0;
+
+        link = bpf_program__attach_iter(prog, opts);
+        if (!link) {
+                fprintf(stderr, "bpf_program__attach_iter() fails\n");
+                return -1;
+        }
+        iter_fd = bpf_iter_create(bpf_link__fd(link));
+        if (iter_fd < 0) {
+                fprintf(stderr, "bpf_iter_create() fails\n");
+                ret = -1;
+                goto free_link;
+        }
+        /* not check contents, but ensure read() ends without error */
+        while ((len = read(iter_fd, buf, sizeof(buf) - 1)) > 0) {
+                buf[len] = 0;
+                printf("%s", buf);
+        }
+        printf("\n");
+  free_link:
+        if (iter_fd >= 0)
+                close(iter_fd);
+        bpf_link__destroy(link);
+        return 0;
+  }
+
+  static void test_task_file(void)
+  {
+        LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+        struct bpf_iter_task_ex *skel;
+        union bpf_iter_link_info linfo;
+        skel = bpf_iter_task_ex__open_and_load();
+        if (skel == NULL)
+                return;
+        memset(&linfo, 0, sizeof(linfo));
+        linfo.task.pid = getpid();
+        opts.link_info = &linfo;
+        opts.link_info_len = sizeof(linfo);
+        printf("PID %d\n", getpid());
+        do_read_opts(skel->progs.dump_task_file, &opts);
+        bpf_iter_task_ex__destroy(skel);
+  }
+
+  int main(int argc, const char * const * argv)
+  {
+        test_task_file();
+        return 0;
+  }
+
+The following lines are the output of the program.
+::
+
+  PID 1859
+
+     tgid      pid       fd      file
+     1859     1859        0 ffffffff82270aa0
+     1859     1859        1 ffffffff82270aa0
+     1859     1859        2 ffffffff82270aa0
+     1859     1859        3 ffffffff82272980
+     1859     1859        4 ffffffff8225e120
+     1859     1859        5 ffffffff82255120
+     1859     1859        6 ffffffff82254f00
+     1859     1859        7 ffffffff82254d80
+     1859     1859        8 ffffffff8225abe0
+
+------------------
+Without Parameters
+------------------
+
+Let us look at how a BPF iterator without parameters skips files of other
+processes in the system. In this case, the BPF program has to check the pid or
+the tid of tasks, or it will receive every opened file in the system (in the
+current *pid* namespace, actually). So, we usually add a global variable in the
+BPF program to pass a *pid* to the BPF program.
+
+The BPF program would look like the following block.
+
+  ::
+
+    ......
+    int target_pid = 0;
+
+    SEC("iter/task_file")
+    int dump_task_file(struct bpf_iter__task_file *ctx)
+    {
+          ......
+          if (task->tgid != target_pid) /* Check task->pid instead to check thread IDs */
+                  return 0;
+          BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+                          (long)file->f_op);
+          return 0;
+    }
+
+The user space program would look like the following block:
+
+  ::
+
+    ......
+    static void test_task_file(void)
+    {
+          ......
+          skel = bpf_iter_task_ex__open_and_load();
+          if (skel == NULL)
+                  return;
+          skel->bss->target_pid = getpid(); /* process ID.  For thread id, use gettid() */
+          memset(&linfo, 0, sizeof(linfo));
+          linfo.task.pid = getpid();
+          opts.link_info = &linfo;
+          opts.link_info_len = sizeof(linfo);
+          ......
+    }
+
+``target_pid`` is a global variable in the BPF program. The user space program
+should initialize the variable with a process ID to skip opened files of other
+processes in the BPF program. When you parametrize a BPF iterator, the iterator
+calls the BPF program fewer times which can save significant resources.
+
+---------------------------
+Parametrizing VMA Iterators
+---------------------------
+
+By default, a BPF VMA iterator includes every VMA in every process.  However,
+you can still specify a process or a thread to include only its VMAs. Unlike
+files, a thread can not have a separate address space (since Linux 2.6.0-test6).
+Here, using *tid* makes no difference from using *pid*.
+
+----------------------------
+Parametrizing Task Iterators
+----------------------------
+
+A BPF task iterator with *pid* includes all tasks (threads) of a process. The
+BPF program receives these tasks one after another. You can specify a BPF task
+iterator with *tid* parameter to include only the tasks that match the given
+*tid*.
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@ -1062,4 +1062,9 @@ format.::
 7. Testing
 ==========

-Kernel bpf selftest `test_btf.c` provides extensive set of BTF-related tests.
+The kernel BPF selftest `tools/testing/selftests/bpf/prog_tests/btf.c`_
+provides an extensive set of BTF-related tests.
+
+.. Links
+.. _tools/testing/selftests/bpf/prog_tests/btf.c:
+   https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/prog_tests/btf.c
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@ -24,11 +24,13 @@ that goes into great technical depth about the BPF Architecture.
   maps
   bpf_prog_run
   classic_vs_extended.rst
+   bpf_iterators
   bpf_licensing
   test_debug
   clang-notes
   linux-notes
   other
+   redirect

 .. only::  subproject and html

--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@ -122,11 +122,11 @@ BPF_END   0xd0   byte swap operations (see `Byte swap instructions`_ below)

 ``BPF_XOR | BPF_K | BPF_ALU`` means::

-  src_reg = (u32) src_reg ^ (u32) imm32
+  dst_reg = (u32) dst_reg ^ (u32) imm32

 ``BPF_XOR | BPF_K | BPF_ALU64`` means::

-  src_reg = src_reg ^ imm32
+  dst_reg = dst_reg ^ imm32


 Byte swap instructions
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@ -72,6 +72,30 @@ argument as its size. By default, without __sz annotation, the size of the type
 of the pointer is used. Without __sz annotation, a kfunc cannot accept a void
 pointer.

+2.2.2 __k Annotation
+--------------------
+
+This annotation is only understood for scalar arguments, where it indicates that
+the verifier must check the scalar argument to be a known constant, which does
+not indicate a size parameter, and the value of the constant is relevant to the
+safety of the program.
+
+An example is given below::
+
+        void *bpf_obj_new(u32 local_type_id__k, ...)
+        {
+        ...
+        }
+
+Here, bpf_obj_new uses local_type_id argument to find out the size of that type
+ID in program's BTF and return a sized pointer to it. Each type ID will have a
+distinct size, hence it is crucial to treat each such call as distinct when
+values don't match during verifier state pruning checks.
+
+Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
+size parameter, and the value of the constant matters for program safety, __k
+suffix should be used.
+
 .. _BPF_kfunc_nodef:

 2.3 Using an existing kernel function
@ -137,22 +161,20 @@ KF_ACQUIRE and KF_RET_NULL flags.
 --------------------------

 The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It
-indicates that the all pointer arguments will always have a guaranteed lifetime,
-and pointers to kernel objects are always passed to helpers in their unmodified
-form (as obtained from acquire kfuncs).
+indicates that the all pointer arguments are valid, and that all pointers to
+BTF objects have been passed in their unmodified form (that is, at a zero
+offset, and without having been obtained from walking another pointer).

-It can be used to enforce that a pointer to a refcounted object acquired from a
-kfunc or BPF helper is passed as an argument to this kfunc without any
-modifications (e.g. pointer arithmetic) such that it is trusted and points to
-the original object.
+There are two types of pointers to kernel objects which are considered "valid":

-Meanwhile, it is also allowed pass pointers to normal memory to such kfuncs,
-but those can have a non-zero offset.
+1. Pointers which are passed as tracepoint or struct_ops callback arguments.
+2. Pointers which were returned from a KF_ACQUIRE or KF_KPTR_GET kfunc.

-This flag is often used for kfuncs that operate (change some property, perform
-some operation) on an object that was obtained using an acquire kfunc. Such
-kfuncs need an unchanged pointer to ensure the integrity of the operation being
-performed on the expected object.
+Pointers to non-BTF objects (e.g. scalar pointers) may also be passed to
+KF_TRUSTED_ARGS kfuncs, and may have a non-zero offset.
+
+The definition of "valid" pointers is subject to change at any time, and has
+absolutely no ABI stability guarantees.

 2.4.6 KF_SLEEPABLE flag
 -----------------------
@ -169,6 +191,15 @@ rebooting or panicking. Due to this additional restrictions apply to these
 calls. At the moment they only require CAP_SYS_BOOT capability, but more can be
 added later.

+2.4.8 KF_RCU flag
+-----------------
+
+The KF_RCU flag is used for kfuncs which have a rcu ptr as its argument.
+When used together with KF_ACQUIRE, it indicates the kfunc should have a
+single argument which must be a trusted argument or a MEM_RCU pointer.
+The argument may have reference count of 0 and the kfunc must take this
+into consideration.
+
 2.5 Registering the kfuncs
 --------------------------

@ -191,3 +222,201 @@ type. An example is shown below::
                return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_task_kfunc_set);
        }
        late_initcall(init_subsystem);
+
+3. Core kfuncs
+==============
+
+The BPF subsystem provides a number of "core" kfuncs that are potentially
+applicable to a wide variety of different possible use cases and programs.
+Those kfuncs are documented here.
+
+3.1 struct task_struct * kfuncs
+-------------------------------
+
+There are a number of kfuncs that allow ``struct task_struct *`` objects to be
+used as kptrs:
+
+.. kernel-doc:: kernel/bpf/helpers.c
+   :identifiers: bpf_task_acquire bpf_task_release
+
+These kfuncs are useful when you want to acquire or release a reference to a
+``struct task_struct *`` that was passed as e.g. a tracepoint arg, or a
+struct_ops callback arg. For example:
+
+.. code-block:: c
+
+	/**
+	 * A trivial example tracepoint program that shows how to
+	 * acquire and release a struct task_struct * pointer.
+	 */
+	SEC("tp_btf/task_newtask")
+	int BPF_PROG(task_acquire_release_example, struct task_struct *task, u64 clone_flags)
+	{
+		struct task_struct *acquired;
+
+		acquired = bpf_task_acquire(task);
+
+		/*
+		 * In a typical program you'd do something like store
+		 * the task in a map, and the map will automatically
+		 * release it later. Here, we release it manually.
+		 */
+		bpf_task_release(acquired);
+		return 0;
+	}
+
+----
+
+A BPF program can also look up a task from a pid. This can be useful if the
+caller doesn't have a trusted pointer to a ``struct task_struct *`` object that
+it can acquire a reference on with bpf_task_acquire().
+
+.. kernel-doc:: kernel/bpf/helpers.c
+   :identifiers: bpf_task_from_pid
+
+Here is an example of it being used:
+
+.. code-block:: c
+
+	SEC("tp_btf/task_newtask")
+	int BPF_PROG(task_get_pid_example, struct task_struct *task, u64 clone_flags)
+	{
+		struct task_struct *lookup;
+
+		lookup = bpf_task_from_pid(task->pid);
+		if (!lookup)
+			/* A task should always be found, as %task is a tracepoint arg. */
+			return -ENOENT;
+
+		if (lookup->pid != task->pid) {
+			/* bpf_task_from_pid() looks up the task via its
+			 * globally-unique pid from the init_pid_ns. Thus,
+			 * the pid of the lookup task should always be the
+			 * same as the input task.
+			 */
+			bpf_task_release(lookup);
+			return -EINVAL;
+		}
+
+		/* bpf_task_from_pid() returns an acquired reference,
+		 * so it must be dropped before returning from the
+		 * tracepoint handler.
+		 */
+		bpf_task_release(lookup);
+		return 0;
+	}
+
+3.2 struct cgroup * kfuncs
+--------------------------
+
+``struct cgroup *`` objects also have acquire and release functions:
+
+.. kernel-doc:: kernel/bpf/helpers.c
+   :identifiers: bpf_cgroup_acquire bpf_cgroup_release
+
+These kfuncs are used in exactly the same manner as bpf_task_acquire() and
+bpf_task_release() respectively, so we won't provide examples for them.
+
+----
+
+You may also acquire a reference to a ``struct cgroup`` kptr that's already
+stored in a map using bpf_cgroup_kptr_get():
+
+.. kernel-doc:: kernel/bpf/helpers.c
+   :identifiers: bpf_cgroup_kptr_get
+
+Here's an example of how it can be used:
+
+.. code-block:: c
+
+	/* struct containing the struct task_struct kptr which is actually stored in the map. */
+	struct __cgroups_kfunc_map_value {
+		struct cgroup __kptr_ref * cgroup;
+	};
+
+	/* The map containing struct __cgroups_kfunc_map_value entries. */
+	struct {
+		__uint(type, BPF_MAP_TYPE_HASH);
+		__type(key, int);
+		__type(value, struct __cgroups_kfunc_map_value);
+		__uint(max_entries, 1);
+	} __cgroups_kfunc_map SEC(".maps");
+
+	/* ... */
+
+	/**
+	 * A simple example tracepoint program showing how a
+	 * struct cgroup kptr that is stored in a map can
+	 * be acquired using the bpf_cgroup_kptr_get() kfunc.
+	 */
+	 SEC("tp_btf/cgroup_mkdir")
+	 int BPF_PROG(cgroup_kptr_get_example, struct cgroup *cgrp, const char *path)
+	 {
+		struct cgroup *kptr;
+		struct __cgroups_kfunc_map_value *v;
+		s32 id = cgrp->self.id;
+
+		/* Assume a cgroup kptr was previously stored in the map. */
+		v = bpf_map_lookup_elem(&__cgroups_kfunc_map, &id);
+		if (!v)
+			return -ENOENT;
+
+		/* Acquire a reference to the cgroup kptr that's already stored in the map. */
+		kptr = bpf_cgroup_kptr_get(&v->cgroup);
+		if (!kptr)
+			/* If no cgroup was present in the map, it's because
+			 * we're racing with another CPU that removed it with
+			 * bpf_kptr_xchg() between the bpf_map_lookup_elem()
+			 * above, and our call to bpf_cgroup_kptr_get().
+			 * bpf_cgroup_kptr_get() internally safely handles this
+			 * race, and will return NULL if the task is no longer
+			 * present in the map by the time we invoke the kfunc.
+			 */
+			return -EBUSY;
+
+		/* Free the reference we just took above. Note that the
+		 * original struct cgroup kptr is still in the map. It will
+		 * be freed either at a later time if another context deletes
+		 * it from the map, or automatically by the BPF subsystem if
+		 * it's still present when the map is destroyed.
+		 */
+		bpf_cgroup_release(kptr);
+
+		return 0;
+        }
+
+----
+
+Another kfunc available for interacting with ``struct cgroup *`` objects is
+bpf_cgroup_ancestor(). This allows callers to access the ancestor of a cgroup,
+and return it as a cgroup kptr.
+
+.. kernel-doc:: kernel/bpf/helpers.c
+   :identifiers: bpf_cgroup_ancestor
+
+Eventually, BPF should be updated to allow this to happen with a normal memory
+load in the program itself. This is currently not possible without more work in
+the verifier. bpf_cgroup_ancestor() can be used as follows:
+
+.. code-block:: c
+
+	/**
+	 * Simple tracepoint example that illustrates how a cgroup's
+	 * ancestor can be accessed using bpf_cgroup_ancestor().
+	 */
+	SEC("tp_btf/cgroup_mkdir")
+	int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path)
+	{
+		struct cgroup *parent;
+
+		/* The parent cgroup resides at the level before the current cgroup's level. */
+		parent = bpf_cgroup_ancestor(cgrp, cgrp->level - 1);
+		if (!parent)
+			return -ENOENT;
+
+		bpf_printk("Parent id is %d", parent->self.id);
+
+		/* Return the parent cgroup that was acquired above. */
+		bpf_cgroup_release(parent);
+		return 0;
+	}
--- a/Documentation/bpf/libbpf/index.rst
+++ b/Documentation/bpf/libbpf/index.rst
@ -1,5 +1,7 @@
 .. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)

+.. _libbpf:
+
 libbpf
 ======

@ -7,6 +9,7 @@ libbpf
   :maxdepth: 1

   API Documentation <https://libbpf.readthedocs.io/en/latest/api.html>
+   program_types
   libbpf_naming_convention
   libbpf_build

--- a/Documentation/bpf/libbpf/program_types.rst
+++ b/Documentation/bpf/libbpf/program_types.rst
@ -0,0 +1,203 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+.. _program_types_and_elf:
+
+Program Types and ELF Sections
+==============================
+
+The table below lists the program types, their attach types where relevant and the ELF section
+names supported by libbpf for them. The ELF section names follow these rules:
+
+- ``type`` is an exact match, e.g. ``SEC("socket")``
+- ``type+`` means it can be either exact ``SEC("type")`` or well-formed ``SEC("type/extras")``
+  with a '``/``' separator between ``type`` and ``extras``.
+
+When ``extras`` are specified, they provide details of how to auto-attach the BPF program.  The
+format of ``extras`` depends on the program type, e.g. ``SEC("tracepoint/<category>/<name>")``
+for tracepoints or ``SEC("usdt/<path>:<provider>:<name>")`` for USDT probes. The extras are
+described in more detail in the footnotes.
+
+
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| Program Type                              | Attach Type                            | ELF Section Name                 | Sleepable |
+===========================================+========================================+==================================+===========+
+| ``BPF_PROG_TYPE_CGROUP_DEVICE``           | ``BPF_CGROUP_DEVICE``                  | ``cgroup/dev``                   |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SKB``              |                                        | ``cgroup/skb``                   |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET_EGRESS``             | ``cgroup_skb/egress``            |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET_INGRESS``            | ``cgroup_skb/ingress``           |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SOCKOPT``          | ``BPF_CGROUP_GETSOCKOPT``              | ``cgroup/getsockopt``            |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_SETSOCKOPT``              | ``cgroup/setsockopt``            |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SOCK_ADDR``        | ``BPF_CGROUP_INET4_BIND``              | ``cgroup/bind4``                 |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET4_CONNECT``           | ``cgroup/connect4``              |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET4_GETPEERNAME``       | ``cgroup/getpeername4``          |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET4_GETSOCKNAME``       | ``cgroup/getsockname4``          |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET6_BIND``              | ``cgroup/bind6``                 |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET6_CONNECT``           | ``cgroup/connect6``              |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET6_GETPEERNAME``       | ``cgroup/getpeername6``          |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET6_GETSOCKNAME``       | ``cgroup/getsockname6``          |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UDP4_RECVMSG``            | ``cgroup/recvmsg4``              |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UDP4_SENDMSG``            | ``cgroup/sendmsg4``              |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UDP6_RECVMSG``            | ``cgroup/recvmsg6``              |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UDP6_SENDMSG``            | ``cgroup/sendmsg6``              |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SOCK``             | ``BPF_CGROUP_INET4_POST_BIND``         | ``cgroup/post_bind4``            |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET6_POST_BIND``         | ``cgroup/post_bind6``            |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET_SOCK_CREATE``        | ``cgroup/sock_create``           |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``cgroup/sock``                  |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_INET_SOCK_RELEASE``       | ``cgroup/sock_release``          |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_CGROUP_SYSCTL``           | ``BPF_CGROUP_SYSCTL``                  | ``cgroup/sysctl``                |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_EXT``                     |                                        | ``freplace+`` [#fentry]_         |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_FLOW_DISSECTOR``          | ``BPF_FLOW_DISSECTOR``                 | ``flow_dissector``               |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_KPROBE``                  |                                        | ``kprobe+`` [#kprobe]_           |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``kretprobe+`` [#kprobe]_        |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``ksyscall+`` [#ksyscall]_       |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        |  ``kretsyscall+`` [#ksyscall]_   |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``uprobe+`` [#uprobe]_           |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``uprobe.s+`` [#uprobe]_         | Yes       |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``uretprobe+`` [#uprobe]_        |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``uretprobe.s+`` [#uprobe]_      | Yes       |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``usdt+`` [#usdt]_               |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_TRACE_KPROBE_MULTI``             | ``kprobe.multi+`` [#kpmulti]_    |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``kretprobe.multi+`` [#kpmulti]_ |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LIRC_MODE2``              | ``BPF_LIRC_MODE2``                     | ``lirc_mode2``                   |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LSM``                     | ``BPF_LSM_CGROUP``                     | ``lsm_cgroup+``                  |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_LSM_MAC``                        | ``lsm+`` [#lsm]_                 |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``lsm.s+`` [#lsm]_               | Yes       |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_IN``                  |                                        | ``lwt_in``                       |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_OUT``                 |                                        | ``lwt_out``                      |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_SEG6LOCAL``           |                                        | ``lwt_seg6local``                |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_LWT_XMIT``                |                                        | ``lwt_xmit``                     |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_PERF_EVENT``              |                                        | ``perf_event``                   |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE`` |                                        | ``raw_tp.w+`` [#rawtp]_          |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``raw_tracepoint.w+``            |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_RAW_TRACEPOINT``          |                                        | ``raw_tp+`` [#rawtp]_            |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``raw_tracepoint+``              |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SCHED_ACT``               |                                        | ``action``                       |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SCHED_CLS``               |                                        | ``classifier``                   |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``tc``                           |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_LOOKUP``               | ``BPF_SK_LOOKUP``                      | ``sk_lookup``                    |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_MSG``                  | ``BPF_SK_MSG_VERDICT``                 | ``sk_msg``                       |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_REUSEPORT``            | ``BPF_SK_REUSEPORT_SELECT_OR_MIGRATE`` | ``sk_reuseport/migrate``         |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_SK_REUSEPORT_SELECT``            | ``sk_reuseport``                 |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SK_SKB``                  |                                        | ``sk_skb``                       |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_SK_SKB_STREAM_PARSER``           | ``sk_skb/stream_parser``         |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_SK_SKB_STREAM_VERDICT``          | ``sk_skb/stream_verdict``        |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SOCKET_FILTER``           |                                        | ``socket``                       |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SOCK_OPS``                | ``BPF_CGROUP_SOCK_OPS``                | ``sockops``                      |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_STRUCT_OPS``              |                                        | ``struct_ops+``                  |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_SYSCALL``                 |                                        | ``syscall``                      | Yes       |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_TRACEPOINT``              |                                        | ``tp+`` [#tp]_                   |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``tracepoint+`` [#tp]_           |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_TRACING``                 | ``BPF_MODIFY_RETURN``                  | ``fmod_ret+`` [#fentry]_         |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``fmod_ret.s+`` [#fentry]_       | Yes       |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_TRACE_FENTRY``                   | ``fentry+`` [#fentry]_           |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``fentry.s+`` [#fentry]_         | Yes       |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_TRACE_FEXIT``                    | ``fexit+`` [#fentry]_            |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``fexit.s+`` [#fentry]_          | Yes       |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_TRACE_ITER``                     | ``iter+`` [#iter]_               |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``iter.s+`` [#iter]_             | Yes       |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_TRACE_RAW_TP``                   | ``tp_btf+`` [#fentry]_           |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_XDP``                     | ``BPF_XDP_CPUMAP``                     | ``xdp.frags/cpumap``             |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``xdp/cpumap``                   |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_XDP_DEVMAP``                     | ``xdp.frags/devmap``             |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``xdp/devmap``                   |           |
+                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_XDP``                            | ``xdp.frags``                    |           |
+                                           +                                        +----------------------------------+-----------+
+|                                           |                                        | ``xdp``                          |           |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+
+
+.. rubric:: Footnotes
+
+.. [#fentry] The ``fentry`` attach format is ``fentry[.s]/<function>``.
+.. [#kprobe] The ``kprobe`` attach format is ``kprobe/<function>[+<offset>]``. Valid
+             characters for ``function`` are ``a-zA-Z0-9_.`` and ``offset`` must be a valid
+             non-negative integer.
+.. [#ksyscall] The ``ksyscall`` attach format is ``ksyscall/<syscall>``.
+.. [#uprobe] The ``uprobe`` attach format is ``uprobe[.s]/<path>:<function>[+<offset>]``.
+.. [#usdt] The ``usdt`` attach format is ``usdt/<path>:<provider>:<name>``.
+.. [#kpmulti] The ``kprobe.multi`` attach format is ``kprobe.multi/<pattern>`` where ``pattern``
+              supports ``*`` and ``?`` wildcards. Valid characters for pattern are
+              ``a-zA-Z0-9_.*?``.
+.. [#lsm] The ``lsm`` attachment format is ``lsm[.s]/<hook>``.
+.. [#rawtp] The ``raw_tp`` attach format is ``raw_tracepoint[.w]/<tracepoint>``.
+.. [#tp] The ``tracepoint`` attach format is ``tracepoint/<category>/<name>``.
+.. [#iter] The ``iter`` attach format is ``iter[.s]/<struct-name>``.
--- a/Documentation/bpf/map_array.rst
+++ b/Documentation/bpf/map_array.rst
@ -0,0 +1,262 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+================================================
+BPF_MAP_TYPE_ARRAY and BPF_MAP_TYPE_PERCPU_ARRAY
+================================================
+
+.. note::
+   - ``BPF_MAP_TYPE_ARRAY`` was introduced in kernel version 3.19
+   - ``BPF_MAP_TYPE_PERCPU_ARRAY`` was introduced in version 4.6
+
+``BPF_MAP_TYPE_ARRAY`` and ``BPF_MAP_TYPE_PERCPU_ARRAY`` provide generic array
+storage. The key type is an unsigned 32-bit integer (4 bytes) and the map is
+of constant size. The size of the array is defined in ``max_entries`` at
+creation time. All array elements are pre-allocated and zero initialized when
+created. ``BPF_MAP_TYPE_PERCPU_ARRAY`` uses a different memory region for each
+CPU whereas ``BPF_MAP_TYPE_ARRAY`` uses the same memory region. The value
+stored can be of any size, however, all array elements are aligned to 8
+bytes.
+
+Since kernel 5.5, memory mapping may be enabled for ``BPF_MAP_TYPE_ARRAY`` by
+setting the flag ``BPF_F_MMAPABLE``. The map definition is page-aligned and
+starts on the first page. Sufficient page-sized and page-aligned blocks of
+memory are allocated to store all array values, starting on the second page,
+which in some cases will result in over-allocation of memory. The benefit of
+using this is increased performance and ease of use since userspace programs
+would not be required to use helper functions to access and mutate data.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+Array elements can be retrieved using the ``bpf_map_lookup_elem()`` helper.
+This helper returns a pointer into the array element, so to avoid data races
+with userspace reading the value, the user must use primitives like
+``__sync_fetch_and_add()`` when updating the value in-place.
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+
+Array elements can be updated using the ``bpf_map_update_elem()`` helper.
+
+``bpf_map_update_elem()`` returns 0 on success, or negative error in case of
+failure.
+
+Since the array is of constant size, ``bpf_map_delete_elem()`` is not supported.
+To clear an array element, you may use ``bpf_map_update_elem()`` to insert a
+zero value to that index.
+
+Per CPU Array
+-------------
+
+Values stored in ``BPF_MAP_TYPE_ARRAY`` can be accessed by multiple programs
+across different CPUs. To restrict storage to a single CPU, you may use a
+``BPF_MAP_TYPE_PERCPU_ARRAY``.
+
+When using a ``BPF_MAP_TYPE_PERCPU_ARRAY`` the ``bpf_map_update_elem()`` and
+``bpf_map_lookup_elem()`` helpers automatically access the slot for the current
+CPU.
+
+bpf_map_lookup_percpu_elem()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
+
+The ``bpf_map_lookup_percpu_elem()`` helper can be used to lookup the array
+value for a specific CPU. Returns value on success , or ``NULL`` if no entry was
+found or ``cpu`` is invalid.
+
+Concurrency
+-----------
+
+Since kernel version 5.1, the BPF infrastructure provides ``struct bpf_spin_lock``
+to synchronize access.
+
+Userspace
+---------
+
+Access from userspace uses libbpf APIs with the same names as above, with
+the map identified by its ``fd``.
+
+Examples
+========
+
+Please see the ``tools/testing/selftests/bpf`` directory for functional
+examples. The code samples below demonstrate API usage.
+
+Kernel BPF
+----------
+
+This snippet shows how to declare an array in a BPF program.
+
+.. code-block:: c
+
+    struct {
+            __uint(type, BPF_MAP_TYPE_ARRAY);
+            __type(key, u32);
+            __type(value, long);
+            __uint(max_entries, 256);
+    } my_map SEC(".maps");
+
+
+This example BPF program shows how to access an array element.
+
+.. code-block:: c
+
+    int bpf_prog(struct __sk_buff *skb)
+    {
+            struct iphdr ip;
+            int index;
+            long *value;
+
+            if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip, sizeof(ip)) < 0)
+                    return 0;
+
+            index = ip.protocol;
+            value = bpf_map_lookup_elem(&my_map, &index);
+            if (value)
+                    __sync_fetch_and_add(value, skb->len);
+
+            return 0;
+    }
+
+Userspace
+---------
+
+BPF_MAP_TYPE_ARRAY
+~~~~~~~~~~~~~~~~~~
+
+This snippet shows how to create an array, using ``bpf_map_create_opts`` to
+set flags.
+
+.. code-block:: c
+
+    #include <bpf/libbpf.h>
+    #include <bpf/bpf.h>
+
+    int create_array()
+    {
+            int fd;
+            LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
+
+            fd = bpf_map_create(BPF_MAP_TYPE_ARRAY,
+                                "example_array",       /* name */
+                                sizeof(__u32),         /* key size */
+                                sizeof(long),          /* value size */
+                                256,                   /* max entries */
+                                &opts);                /* create opts */
+            return fd;
+    }
+
+This snippet shows how to initialize the elements of an array.
+
+.. code-block:: c
+
+    int initialize_array(int fd)
+    {
+            __u32 i;
+            long value;
+            int ret;
+
+            for (i = 0; i < 256; i++) {
+                    value = i;
+                    ret = bpf_map_update_elem(fd, &i, &value, BPF_ANY);
+                    if (ret < 0)
+                            return ret;
+            }
+
+            return ret;
+    }
+
+This snippet shows how to retrieve an element value from an array.
+
+.. code-block:: c
+
+    int lookup(int fd)
+    {
+            __u32 index = 42;
+            long value;
+            int ret;
+
+            ret = bpf_map_lookup_elem(fd, &index, &value);
+            if (ret < 0)
+                    return ret;
+
+            /* use value here */
+            assert(value == 42);
+
+            return ret;
+    }
+
+BPF_MAP_TYPE_PERCPU_ARRAY
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This snippet shows how to initialize the elements of a per CPU array.
+
+.. code-block:: c
+
+    int initialize_array(int fd)
+    {
+            int ncpus = libbpf_num_possible_cpus();
+            long values[ncpus];
+            __u32 i, j;
+            int ret;
+
+            for (i = 0; i < 256 ; i++) {
+                    for (j = 0; j < ncpus; j++)
+                            values[j] = i;
+                    ret = bpf_map_update_elem(fd, &i, &values, BPF_ANY);
+                    if (ret < 0)
+                            return ret;
+            }
+
+            return ret;
+    }
+
+This snippet shows how to access the per CPU elements of an array value.
+
+.. code-block:: c
+
+    int lookup(int fd)
+    {
+            int ncpus = libbpf_num_possible_cpus();
+            __u32 index = 42, j;
+            long values[ncpus];
+            int ret;
+
+            ret = bpf_map_lookup_elem(fd, &index, &values);
+            if (ret < 0)
+                    return ret;
+
+            for (j = 0; j < ncpus; j++) {
+                    /* Use per CPU value here */
+                    assert(values[j] == 42);
+            }
+
+            return ret;
+    }
+
+Semantics
+=========
+
+As shown in the example above, when accessing a ``BPF_MAP_TYPE_PERCPU_ARRAY``
+in userspace, each value is an array with ``ncpus`` elements.
+
+When calling ``bpf_map_update_elem()`` the flag ``BPF_NOEXIST`` can not be used
+for these maps.
--- a/Documentation/bpf/map_bloom_filter.rst
+++ b/Documentation/bpf/map_bloom_filter.rst
@ -0,0 +1,174 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=========================
+BPF_MAP_TYPE_BLOOM_FILTER
+=========================
+
+.. note::
+   - ``BPF_MAP_TYPE_BLOOM_FILTER`` was introduced in kernel version 5.16
+
+``BPF_MAP_TYPE_BLOOM_FILTER`` provides a BPF bloom filter map. Bloom
+filters are a space-efficient probabilistic data structure used to
+quickly test whether an element exists in a set. In a bloom filter,
+false positives are possible whereas false negatives are not.
+
+The bloom filter map does not have keys, only values. When the bloom
+filter map is created, it must be created with a ``key_size`` of 0.  The
+bloom filter map supports two operations:
+
+- push: adding an element to the map
+- peek: determining whether an element is present in the map
+
+BPF programs must use ``bpf_map_push_elem`` to add an element to the
+bloom filter map and ``bpf_map_peek_elem`` to query the map. These
+operations are exposed to userspace applications using the existing
+``bpf`` syscall in the following way:
+
+- ``BPF_MAP_UPDATE_ELEM`` -> push
+- ``BPF_MAP_LOOKUP_ELEM`` -> peek
+
+The ``max_entries`` size that is specified at map creation time is used
+to approximate a reasonable bitmap size for the bloom filter, and is not
+otherwise strictly enforced. If the user wishes to insert more entries
+into the bloom filter than ``max_entries``, this may lead to a higher
+false positive rate.
+
+The number of hashes to use for the bloom filter is configurable using
+the lower 4 bits of ``map_extra`` in ``union bpf_attr`` at map creation
+time. If no number is specified, the default used will be 5 hash
+functions. In general, using more hashes decreases both the false
+positive rate and the speed of a lookup.
+
+It is not possible to delete elements from a bloom filter map. A bloom
+filter map may be used as an inner map. The user is responsible for
+synchronising concurrent updates and lookups to ensure no false negative
+lookups occur.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_map_push_elem()
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+
+A ``value`` can be added to a bloom filter using the
+``bpf_map_push_elem()`` helper. The ``flags`` parameter must be set to
+``BPF_ANY`` when adding an entry to the bloom filter. This helper
+returns ``0`` on success, or negative error in case of failure.
+
+bpf_map_peek_elem()
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   long bpf_map_peek_elem(struct bpf_map *map, void *value)
+
+The ``bpf_map_peek_elem()`` helper is used to determine whether
+``value`` is present in the bloom filter map. This helper returns ``0``
+if ``value`` is probably present in the map, or ``-ENOENT`` if ``value``
+is definitely not present in the map.
+
+Userspace
+---------
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   int bpf_map_update_elem (int fd, const void *key, const void *value, __u64 flags)
+
+A userspace program can add a ``value`` to a bloom filter using libbpf's
+``bpf_map_update_elem`` function. The ``key`` parameter must be set to
+``NULL`` and ``flags`` must be set to ``BPF_ANY``. Returns ``0`` on
+success, or negative error in case of failure.
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   int bpf_map_lookup_elem (int fd, const void *key, void *value)
+
+A userspace program can determine the presence of ``value`` in a bloom
+filter using libbpf's ``bpf_map_lookup_elem`` function. The ``key``
+parameter must be set to ``NULL``. Returns ``0`` if ``value`` is
+probably present in the map, or ``-ENOENT`` if ``value`` is definitely
+not present in the map.
+
+Examples
+========
+
+Kernel BPF
+----------
+
+This snippet shows how to declare a bloom filter in a BPF program:
+
+.. code-block:: c
+
+    struct {
+            __uint(type, BPF_MAP_TYPE_BLOOM_FILTER);
+            __type(value, __u32);
+            __uint(max_entries, 1000);
+            __uint(map_extra, 3);
+    } bloom_filter SEC(".maps");
+
+This snippet shows how to determine presence of a value in a bloom
+filter in a BPF program:
+
+.. code-block:: c
+
+    void *lookup(__u32 key)
+    {
+            if (bpf_map_peek_elem(&bloom_filter, &key) == 0) {
+                    /* Verify not a false positive and fetch an associated
+                     * value using a secondary lookup, e.g. in a hash table
+                     */
+                    return bpf_map_lookup_elem(&hash_table, &key);
+            }
+            return 0;
+    }
+
+Userspace
+---------
+
+This snippet shows how to use libbpf to create a bloom filter map from
+userspace:
+
+.. code-block:: c
+
+    int create_bloom()
+    {
+            LIBBPF_OPTS(bpf_map_create_opts, opts,
+                        .map_extra = 3);             /* number of hashes */
+
+            return bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER,
+                                  "ipv6_bloom",      /* name */
+                                  0,                 /* key size, must be zero */
+                                  sizeof(ipv6_addr), /* value size */
+                                  10000,             /* max entries */
+                                  &opts);            /* create options */
+    }
+
+This snippet shows how to add an element to a bloom filter from
+userspace:
+
+.. code-block:: c
+
+    int add_element(struct bpf_map *bloom_map, __u32 value)
+    {
+            int bloom_fd = bpf_map__fd(bloom_map);
+            return bpf_map_update_elem(bloom_fd, NULL, &value, BPF_ANY);
+    }
+
+References
+==========
+
+https://lwn.net/ml/bpf/20210831225005.2762202-1-joannekoong@fb.com/
--- a/Documentation/bpf/map_cgrp_storage.rst
+++ b/Documentation/bpf/map_cgrp_storage.rst
@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Meta Platforms, Inc. and affiliates.
+
+=========================
+BPF_MAP_TYPE_CGRP_STORAGE
+=========================
+
+The ``BPF_MAP_TYPE_CGRP_STORAGE`` map type represents a local fix-sized
+storage for cgroups. It is only available with ``CONFIG_CGROUPS``.
+The programs are made available by the same Kconfig. The
+data for a particular cgroup can be retrieved by looking up the map
+with that cgroup.
+
+This document describes the usage and semantics of the
+``BPF_MAP_TYPE_CGRP_STORAGE`` map type.
+
+Usage
+=====
+
+The map key must be ``sizeof(int)`` representing a cgroup fd.
+To access the storage in a program, use ``bpf_cgrp_storage_get``::
+
+    void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags)
+
+``flags`` could be 0 or ``BPF_LOCAL_STORAGE_GET_F_CREATE`` which indicates that
+a new local storage will be created if one does not exist.
+
+The local storage can be removed with ``bpf_cgrp_storage_delete``::
+
+    long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup)
+
+The map is available to all program types.
+
+Examples
+========
+
+A BPF program example with BPF_MAP_TYPE_CGRP_STORAGE::
+
+    #include <vmlinux.h>
+    #include <bpf/bpf_helpers.h>
+    #include <bpf/bpf_tracing.h>
+
+    struct {
+            __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+            __uint(map_flags, BPF_F_NO_PREALLOC);
+            __type(key, int);
+            __type(value, long);
+    } cgrp_storage SEC(".maps");
+
+    SEC("tp_btf/sys_enter")
+    int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+    {
+            struct task_struct *task = bpf_get_current_task_btf();
+            long *ptr;
+
+            ptr = bpf_cgrp_storage_get(&cgrp_storage, task->cgroups->dfl_cgrp, 0,
+                                       BPF_LOCAL_STORAGE_GET_F_CREATE);
+            if (ptr)
+                __sync_fetch_and_add(ptr, 1);
+
+            return 0;
+    }
+
+Userspace accessing map declared above::
+
+    #include <linux/bpf.h>
+    #include <linux/libbpf.h>
+
+    __u32 map_lookup(struct bpf_map *map, int cgrp_fd)
+    {
+            __u32 *value;
+            value = bpf_map_lookup_elem(bpf_map__fd(map), &cgrp_fd);
+            if (value)
+                return *value;
+            return 0;
+    }
+
+Difference Between BPF_MAP_TYPE_CGRP_STORAGE and BPF_MAP_TYPE_CGROUP_STORAGE
+============================================================================
+
+The old cgroup storage map ``BPF_MAP_TYPE_CGROUP_STORAGE`` has been marked as
+deprecated (renamed to ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``). The new
+``BPF_MAP_TYPE_CGRP_STORAGE`` map should be used instead. The following
+illusates the main difference between ``BPF_MAP_TYPE_CGRP_STORAGE`` and
+``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``.
+
+(1). ``BPF_MAP_TYPE_CGRP_STORAGE`` can be used by all program types while
+     ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` is available only to cgroup program types
+     like BPF_CGROUP_INET_INGRESS or BPF_CGROUP_SOCK_OPS, etc.
+
+(2). ``BPF_MAP_TYPE_CGRP_STORAGE`` supports local storage for more than one
+     cgroup while ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` only supports one cgroup
+     which is attached by a BPF program.
+
+(3). ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` allocates local storage at attach time so
+     ``bpf_get_local_storage()`` always returns non-NULL local storage.
+     ``BPF_MAP_TYPE_CGRP_STORAGE`` allocates local storage at runtime so
+     it is possible that ``bpf_cgrp_storage_get()`` may return null local storage.
+     To avoid such null local storage issue, user space can do
+     ``bpf_map_update_elem()`` to pre-allocate local storage before a BPF program
+     is attached.
+
+(4). ``BPF_MAP_TYPE_CGRP_STORAGE`` supports deleting local storage by a BPF program
+     while ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` only deletes storage during
+     prog detach time.
+
+So overall, ``BPF_MAP_TYPE_CGRP_STORAGE`` supports all ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``
+functionality and beyond. It is recommended to use ``BPF_MAP_TYPE_CGRP_STORAGE``
+instead of ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``.
--- a/Documentation/bpf/map_cpumap.rst
+++ b/Documentation/bpf/map_cpumap.rst
@ -0,0 +1,177 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+===================
+BPF_MAP_TYPE_CPUMAP
+===================
+
+.. note::
+   - ``BPF_MAP_TYPE_CPUMAP`` was introduced in kernel version 4.15
+
+.. kernel-doc:: kernel/bpf/cpumap.c
+ :doc: cpu map
+
+An example use-case for this map type is software based Receive Side Scaling (RSS).
+
+The CPUMAP represents the CPUs in the system indexed as the map-key, and the
+map-value is the config setting (per CPUMAP entry). Each CPUMAP entry has a dedicated
+kernel thread bound to the given CPU to represent the remote CPU execution unit.
+
+Starting from Linux kernel version 5.9 the CPUMAP can run a second XDP program
+on the remote CPU. This allows an XDP program to split its processing across
+multiple CPUs. For example, a scenario where the initial CPU (that sees/receives
+the packets) needs to do minimal packet processing and the remote CPU (to which
+the packet is directed) can afford to spend more cycles processing the frame. The
+initial CPU is where the XDP redirect program is executed. The remote CPU
+receives raw ``xdp_frame`` objects.
+
+Usage
+=====
+
+Kernel BPF
+----------
+bpf_redirect_map()
+^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+     long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+
+Redirect the packet to the endpoint referenced by ``map`` at index ``key``.
+For ``BPF_MAP_TYPE_CPUMAP`` this map contains references to CPUs.
+
+The lower two bits of ``flags`` are used as the return code if the map lookup
+fails. This is so that the return value can be one of the XDP program return
+codes up to ``XDP_TX``, as chosen by the caller.
+
+User space
+----------
+.. note::
+    CPUMAP entries can only be updated/looked up/deleted from user space and not
+    from an eBPF program. Trying to call these functions from a kernel eBPF
+    program will result in the program failing to load and a verifier warning.
+
+bpf_map_update_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+    int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags);
+
+CPU entries can be added or updated using the ``bpf_map_update_elem()``
+helper. This helper replaces existing elements atomically. The ``value`` parameter
+can be ``struct bpf_cpumap_val``.
+
+ .. code-block:: c
+
+    struct bpf_cpumap_val {
+        __u32 qsize;  /* queue size to remote target CPU */
+        union {
+            int   fd; /* prog fd on map write */
+            __u32 id; /* prog id on map read */
+        } bpf_prog;
+    };
+
+The flags argument can be one of the following:
+  - BPF_ANY: Create a new element or update an existing element.
+  - BPF_NOEXIST: Create a new element only if it did not exist.
+  - BPF_EXIST: Update an existing element.
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+    int bpf_map_lookup_elem(int fd, const void *key, void *value);
+
+CPU entries can be retrieved using the ``bpf_map_lookup_elem()``
+helper.
+
+bpf_map_delete_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+    int bpf_map_delete_elem(int fd, const void *key);
+
+CPU entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case of
+failure.
+
+Examples
+========
+Kernel
+------
+
+The following code snippet shows how to declare a ``BPF_MAP_TYPE_CPUMAP`` called
+``cpu_map`` and how to redirect packets to a remote CPU using a round robin scheme.
+
+.. code-block:: c
+
+   struct {
+        __uint(type, BPF_MAP_TYPE_CPUMAP);
+        __type(key, __u32);
+        __type(value, struct bpf_cpumap_val);
+        __uint(max_entries, 12);
+    } cpu_map SEC(".maps");
+
+    struct {
+        __uint(type, BPF_MAP_TYPE_ARRAY);
+        __type(key, __u32);
+        __type(value, __u32);
+        __uint(max_entries, 12);
+    } cpus_available SEC(".maps");
+
+    struct {
+        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+        __type(key, __u32);
+        __type(value, __u32);
+        __uint(max_entries, 1);
+    } cpus_iterator SEC(".maps");
+
+    SEC("xdp")
+    int  xdp_redir_cpu_round_robin(struct xdp_md *ctx)
+    {
+        __u32 key = 0;
+        __u32 cpu_dest = 0;
+        __u32 *cpu_selected, *cpu_iterator;
+        __u32 cpu_idx;
+
+        cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key);
+        if (!cpu_iterator)
+            return XDP_ABORTED;
+        cpu_idx = *cpu_iterator;
+
+        *cpu_iterator += 1;
+        if (*cpu_iterator == bpf_num_possible_cpus())
+            *cpu_iterator = 0;
+
+        cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+        if (!cpu_selected)
+            return XDP_ABORTED;
+        cpu_dest = *cpu_selected;
+
+        if (cpu_dest >= bpf_num_possible_cpus())
+            return XDP_ABORTED;
+
+        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+    }
+
+User space
+----------
+
+The following code snippet shows how to dynamically set the max_entries for a
+CPUMAP to the max number of cpus available on the system.
+
+.. code-block:: c
+
+    int set_max_cpu_entries(struct bpf_map *cpu_map)
+    {
+        if (bpf_map__set_max_entries(cpu_map, libbpf_num_possible_cpus()) < 0) {
+            fprintf(stderr, "Failed to set max entries for cpu_map map: %s",
+                strerror(errno));
+            return -1;
+        }
+        return 0;
+    }
+
+References
+===========
+
+- https://developers.redhat.com/blog/2021/05/13/receive-side-scaling-rss-with-ebpf-and-cpumap#redirecting_into_a_cpumap
--- a/Documentation/bpf/map_devmap.rst
+++ b/Documentation/bpf/map_devmap.rst
@ -0,0 +1,238 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=================================================
+BPF_MAP_TYPE_DEVMAP and BPF_MAP_TYPE_DEVMAP_HASH
+=================================================
+
+.. note::
+   - ``BPF_MAP_TYPE_DEVMAP`` was introduced in kernel version 4.14
+   - ``BPF_MAP_TYPE_DEVMAP_HASH`` was introduced in kernel version 5.4
+
+``BPF_MAP_TYPE_DEVMAP`` and ``BPF_MAP_TYPE_DEVMAP_HASH`` are BPF maps primarily
+used as backend maps for the XDP BPF helper call ``bpf_redirect_map()``.
+``BPF_MAP_TYPE_DEVMAP`` is backed by an array that uses the key as
+the index to lookup a reference to a net device. While ``BPF_MAP_TYPE_DEVMAP_HASH``
+is backed by a hash table that uses a key to lookup a reference to a net device.
+The user provides either <``key``/ ``ifindex``> or <``key``/ ``struct bpf_devmap_val``>
+pairs to update the maps with new net devices.
+
+.. note::
+    - The key to a hash map doesn't have to be an ``ifindex``.
+    - While ``BPF_MAP_TYPE_DEVMAP_HASH`` allows for densely packing the net devices
+      it comes at the cost of a hash of the key when performing a look up.
+
+The setup and packet enqueue/send code is shared between the two types of
+devmap; only the lookup and insertion is different.
+
+Usage
+=====
+Kernel BPF
+----------
+bpf_redirect_map()
+^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+    long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+
+Redirect the packet to the endpoint referenced by ``map`` at index ``key``.
+For ``BPF_MAP_TYPE_DEVMAP`` and ``BPF_MAP_TYPE_DEVMAP_HASH`` this map contains
+references to net devices (for forwarding packets through other ports).
+
+The lower two bits of *flags* are used as the return code if the map lookup
+fails. This is so that the return value can be one of the XDP program return
+codes up to ``XDP_TX``, as chosen by the caller. The higher bits of ``flags``
+can be set to ``BPF_F_BROADCAST`` or ``BPF_F_EXCLUDE_INGRESS`` as defined
+below.
+
+With ``BPF_F_BROADCAST`` the packet will be broadcast to all the interfaces
+in the map, with ``BPF_F_EXCLUDE_INGRESS`` the ingress interface will be excluded
+from the broadcast.
+
+.. note::
+    - The key is ignored if BPF_F_BROADCAST is set.
+    - The broadcast feature can also be used to implement multicast forwarding:
+      simply create multiple DEVMAPs, each one corresponding to a single multicast group.
+
+This helper will return ``XDP_REDIRECT`` on success, or the value of the two
+lower bits of the ``flags`` argument if the map lookup fails.
+
+More information about redirection can be found :doc:`redirect`
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+   void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+Net device entries can be retrieved using the ``bpf_map_lookup_elem()``
+helper.
+
+User space
+----------
+.. note::
+    DEVMAP entries can only be updated/deleted from user space and not
+    from an eBPF program. Trying to call these functions from a kernel eBPF
+    program will result in the program failing to load and a verifier warning.
+
+bpf_map_update_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+   int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags);
+
+Net device entries can be added or updated using the ``bpf_map_update_elem()``
+helper. This helper replaces existing elements atomically. The ``value`` parameter
+can be ``struct bpf_devmap_val`` or a simple ``int ifindex`` for backwards
+compatibility.
+
+ .. code-block:: c
+
+    struct bpf_devmap_val {
+        __u32 ifindex;   /* device index */
+        union {
+            int   fd;  /* prog fd on map write */
+            __u32 id;  /* prog id on map read */
+        } bpf_prog;
+    };
+
+The ``flags`` argument can be one of the following:
+  - ``BPF_ANY``: Create a new element or update an existing element.
+  - ``BPF_NOEXIST``: Create a new element only if it did not exist.
+  - ``BPF_EXIST``: Update an existing element.
+
+DEVMAPs can associate a program with a device entry by adding a ``bpf_prog.fd``
+to ``struct bpf_devmap_val``. Programs are run after ``XDP_REDIRECT`` and have
+access to both Rx device and Tx device. The  program associated with the ``fd``
+must have type XDP with expected attach type ``xdp_devmap``.
+When a program is associated with a device index, the program is run on an
+``XDP_REDIRECT`` and before the buffer is added to the per-cpu queue. Examples
+of how to attach/use xdp_devmap progs can be found in the kernel selftests:
+
+- ``tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c``
+- ``tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c``
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+.. c:function::
+   int bpf_map_lookup_elem(int fd, const void *key, void *value);
+
+Net device entries can be retrieved using the ``bpf_map_lookup_elem()``
+helper.
+
+bpf_map_delete_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+.. c:function::
+   int bpf_map_delete_elem(int fd, const void *key);
+
+Net device entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case of
+failure.
+
+Examples
+========
+
+Kernel BPF
+----------
+
+The following code snippet shows how to declare a ``BPF_MAP_TYPE_DEVMAP``
+called tx_port.
+
+.. code-block:: c
+
+    struct {
+        __uint(type, BPF_MAP_TYPE_DEVMAP);
+        __type(key, __u32);
+        __type(value, __u32);
+        __uint(max_entries, 256);
+    } tx_port SEC(".maps");
+
+The following code snippet shows how to declare a ``BPF_MAP_TYPE_DEVMAP_HASH``
+called forward_map.
+
+.. code-block:: c
+
+    struct {
+        __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+        __type(key, __u32);
+        __type(value, struct bpf_devmap_val);
+        __uint(max_entries, 32);
+    } forward_map SEC(".maps");
+
+.. note::
+
+    The value type in the DEVMAP above is a ``struct bpf_devmap_val``
+
+The following code snippet shows a simple xdp_redirect_map program. This program
+would work with a user space program that populates the devmap ``forward_map`` based
+on ingress ifindexes. The BPF program (below) is redirecting packets using the
+ingress ``ifindex`` as the ``key``.
+
+.. code-block:: c
+
+    SEC("xdp")
+    int xdp_redirect_map_func(struct xdp_md *ctx)
+    {
+        int index = ctx->ingress_ifindex;
+
+        return bpf_redirect_map(&forward_map, index, 0);
+    }
+
+The following code snippet shows a BPF program that is broadcasting packets to
+all the interfaces in the ``tx_port`` devmap.
+
+.. code-block:: c
+
+    SEC("xdp")
+    int xdp_redirect_map_func(struct xdp_md *ctx)
+    {
+        return bpf_redirect_map(&tx_port, 0, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+    }
+
+User space
+----------
+
+The following code snippet shows how to update a devmap called ``tx_port``.
+
+.. code-block:: c
+
+    int update_devmap(int ifindex, int redirect_ifindex)
+    {
+        int ret;
+
+        ret = bpf_map_update_elem(bpf_map__fd(tx_port), &ifindex, &redirect_ifindex, 0);
+        if (ret < 0) {
+            fprintf(stderr, "Failed to update devmap_ value: %s\n",
+                strerror(errno));
+        }
+
+        return ret;
+    }
+
+The following code snippet shows how to update a hash_devmap called ``forward_map``.
+
+.. code-block:: c
+
+    int update_devmap(int ifindex, int redirect_ifindex)
+    {
+        struct bpf_devmap_val devmap_val = { .ifindex = redirect_ifindex };
+        int ret;
+
+        ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0);
+        if (ret < 0) {
+            fprintf(stderr, "Failed to update devmap_ value: %s\n",
+                strerror(errno));
+        }
+        return ret;
+    }
+
+References
+===========
+
+- https://lwn.net/Articles/728146/
+- https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=6f9d451ab1a33728adb72d7ff66a7b374d665176
+- https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4106
--- a/Documentation/bpf/map_hash.rst
+++ b/Documentation/bpf/map_hash.rst
@ -34,7 +34,14 @@ the ``BPF_F_NO_COMMON_LRU`` flag when calling ``bpf_map_create``.
 Usage
 =====

-.. c:function::
+Kernel BPF
+----------
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
   long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)

 Hash entries can be added or updated using the ``bpf_map_update_elem()``
@ -49,14 +56,22 @@ parameter can be used to control the update behaviour:
 ``bpf_map_update_elem()`` returns 0 on success, or negative error in
 case of failure.

-.. c:function::
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
   void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)

 Hash entries can be retrieved using the ``bpf_map_lookup_elem()``
 helper. This helper returns a pointer to the value associated with
 ``key``, or ``NULL`` if no entry was found.

-.. c:function::
+bpf_map_delete_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
   long bpf_map_delete_elem(struct bpf_map *map, const void *key)

 Hash entries can be deleted using the ``bpf_map_delete_elem()``
@ -70,7 +85,11 @@ For ``BPF_MAP_TYPE_PERCPU_HASH`` and ``BPF_MAP_TYPE_LRU_PERCPU_HASH``
 the ``bpf_map_update_elem()`` and ``bpf_map_lookup_elem()`` helpers
 automatically access the hash slot for the current CPU.

-.. c:function::
+bpf_map_lookup_percpu_elem()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
   void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)

 The ``bpf_map_lookup_percpu_elem()`` helper can be used to lookup the
@ -89,7 +108,11 @@ See ``tools/testing/selftests/bpf/progs/test_spin_lock.c``.
 Userspace
 ---------

-.. c:function::
+bpf_map_get_next_key()
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
   int bpf_map_get_next_key(int fd, const void *cur_key, void *next_key)

 In userspace, it is possible to iterate through the keys of a hash using
--- a/Documentation/bpf/map_lpm_trie.rst
+++ b/Documentation/bpf/map_lpm_trie.rst
@ -0,0 +1,197 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=====================
+BPF_MAP_TYPE_LPM_TRIE
+=====================
+
+.. note::
+   - ``BPF_MAP_TYPE_LPM_TRIE`` was introduced in kernel version 4.11
+
+``BPF_MAP_TYPE_LPM_TRIE`` provides a longest prefix match algorithm that
+can be used to match IP addresses to a stored set of prefixes.
+Internally, data is stored in an unbalanced trie of nodes that uses
+``prefixlen,data`` pairs as its keys. The ``data`` is interpreted in
+network byte order, i.e. big endian, so ``data[0]`` stores the most
+significant byte.
+
+LPM tries may be created with a maximum prefix length that is a multiple
+of 8, in the range from 8 to 2048. The key used for lookup and update
+operations is a ``struct bpf_lpm_trie_key``, extended by
+``max_prefixlen/8`` bytes.
+
+- For IPv4 addresses the data length is 4 bytes
+- For IPv6 addresses the data length is 16 bytes
+
+The value type stored in the LPM trie can be any user defined type.
+
+.. note::
+   When creating a map of type ``BPF_MAP_TYPE_LPM_TRIE`` you must set the
+   ``BPF_F_NO_PREALLOC`` flag.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+The longest prefix entry for a given data value can be found using the
+``bpf_map_lookup_elem()`` helper. This helper returns a pointer to the
+value associated with the longest matching ``key``, or ``NULL`` if no
+entry was found.
+
+The ``key`` should have ``prefixlen`` set to ``max_prefixlen`` when
+performing longest prefix lookups. For example, when searching for the
+longest prefix match for an IPv4 address, ``prefixlen`` should be set to
+``32``.
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+
+Prefix entries can be added or updated using the ``bpf_map_update_elem()``
+helper. This helper replaces existing elements atomically.
+
+``bpf_map_update_elem()`` returns ``0`` on success, or negative error in
+case of failure.
+
+ .. note::
+    The flags parameter must be one of BPF_ANY, BPF_NOEXIST or BPF_EXIST,
+    but the value is ignored, giving BPF_ANY semantics.
+
+bpf_map_delete_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   long bpf_map_delete_elem(struct bpf_map *map, const void *key)
+
+Prefix entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case
+of failure.
+
+Userspace
+---------
+
+Access from userspace uses libbpf APIs with the same names as above, with
+the map identified by ``fd``.
+
+bpf_map_get_next_key()
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   int bpf_map_get_next_key (int fd, const void *cur_key, void *next_key)
+
+A userspace program can iterate through the entries in an LPM trie using
+libbpf's ``bpf_map_get_next_key()`` function. The first key can be
+fetched by calling ``bpf_map_get_next_key()`` with ``cur_key`` set to
+``NULL``. Subsequent calls will fetch the next key that follows the
+current key. ``bpf_map_get_next_key()`` returns ``0`` on success,
+``-ENOENT`` if ``cur_key`` is the last key in the trie, or negative
+error in case of failure.
+
+``bpf_map_get_next_key()`` will iterate through the LPM trie elements
+from leftmost leaf first. This means that iteration will return more
+specific keys before less specific ones.
+
+Examples
+========
+
+Please see ``tools/testing/selftests/bpf/test_lpm_map.c`` for examples
+of LPM trie usage from userspace. The code snippets below demonstrate
+API usage.
+
+Kernel BPF
+----------
+
+The following BPF code snippet shows how to declare a new LPM trie for IPv4
+address prefixes:
+
+.. code-block:: c
+
+    #include <linux/bpf.h>
+    #include <bpf/bpf_helpers.h>
+
+    struct ipv4_lpm_key {
+            __u32 prefixlen;
+            __u32 data;
+    };
+
+    struct {
+            __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+            __type(key, struct ipv4_lpm_key);
+            __type(value, __u32);
+            __uint(map_flags, BPF_F_NO_PREALLOC);
+            __uint(max_entries, 255);
+    } ipv4_lpm_map SEC(".maps");
+
+The following BPF code snippet shows how to lookup by IPv4 address:
+
+.. code-block:: c
+
+    void *lookup(__u32 ipaddr)
+    {
+            struct ipv4_lpm_key key = {
+                    .prefixlen = 32,
+                    .data = ipaddr
+            };
+
+            return bpf_map_lookup_elem(&ipv4_lpm_map, &key);
+    }
+
+Userspace
+---------
+
+The following snippet shows how to insert an IPv4 prefix entry into an
+LPM trie:
+
+.. code-block:: c
+
+    int add_prefix_entry(int lpm_fd, __u32 addr, __u32 prefixlen, struct value *value)
+    {
+            struct ipv4_lpm_key ipv4_key = {
+                    .prefixlen = prefixlen,
+                    .data = addr
+            };
+            return bpf_map_update_elem(lpm_fd, &ipv4_key, value, BPF_ANY);
+    }
+
+The following snippet shows a userspace program walking through the entries
+of an LPM trie:
+
+
+.. code-block:: c
+
+    #include <bpf/libbpf.h>
+    #include <bpf/bpf.h>
+
+    void iterate_lpm_trie(int map_fd)
+    {
+            struct ipv4_lpm_key *cur_key = NULL;
+            struct ipv4_lpm_key next_key;
+            struct value value;
+            int err;
+
+            for (;;) {
+                    err = bpf_map_get_next_key(map_fd, cur_key, &next_key);
+                    if (err)
+                            break;
+
+                    bpf_map_lookup_elem(map_fd, &next_key, &value);
+
+                    /* Use key and value here */
+
+                    cur_key = &next_key;
+            }
+    }
--- a/Documentation/bpf/map_of_maps.rst
+++ b/Documentation/bpf/map_of_maps.rst
@ -0,0 +1,130 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+========================================================
+BPF_MAP_TYPE_ARRAY_OF_MAPS and BPF_MAP_TYPE_HASH_OF_MAPS
+========================================================
+
+.. note::
+   - ``BPF_MAP_TYPE_ARRAY_OF_MAPS`` and ``BPF_MAP_TYPE_HASH_OF_MAPS`` were
+     introduced in kernel version 4.12
+
+``BPF_MAP_TYPE_ARRAY_OF_MAPS`` and ``BPF_MAP_TYPE_HASH_OF_MAPS`` provide general
+purpose support for map in map storage. One level of nesting is supported, where
+an outer map contains instances of a single type of inner map, for example
+``array_of_maps->sock_map``.
+
+When creating an outer map, an inner map instance is used to initialize the
+metadata that the outer map holds about its inner maps. This inner map has a
+separate lifetime from the outer map and can be deleted after the outer map has
+been created.
+
+The outer map supports element lookup, update and delete from user space using
+the syscall API. A BPF program is only allowed to do element lookup in the outer
+map.
+
+.. note::
+   - Multi-level nesting is not supported.
+   - Any BPF map type can be used as an inner map, except for
+     ``BPF_MAP_TYPE_PROG_ARRAY``.
+   - A BPF program cannot update or delete outer map entries.
+
+For ``BPF_MAP_TYPE_ARRAY_OF_MAPS`` the key is an unsigned 32-bit integer index
+into the array. The array is a fixed size with ``max_entries`` elements that are
+zero initialized when created.
+
+For ``BPF_MAP_TYPE_HASH_OF_MAPS`` the key type can be chosen when defining the
+map. The kernel is responsible for allocating and freeing key/value pairs, up to
+the max_entries limit that you specify. Hash maps use pre-allocation of hash
+table elements by default. The ``BPF_F_NO_PREALLOC`` flag can be used to disable
+pre-allocation when it is too memory expensive.
+
+Usage
+=====
+
+Kernel BPF Helper
+-----------------
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+Inner maps can be retrieved using the ``bpf_map_lookup_elem()`` helper. This
+helper returns a pointer to the inner map, or ``NULL`` if no entry was found.
+
+Examples
+========
+
+Kernel BPF Example
+------------------
+
+This snippet shows how to create and initialise an array of devmaps in a BPF
+program. Note that the outer array can only be modified from user space using
+the syscall API.
+
+.. code-block:: c
+
+    struct inner_map {
+            __uint(type, BPF_MAP_TYPE_DEVMAP);
+            __uint(max_entries, 10);
+            __type(key, __u32);
+            __type(value, __u32);
+    } inner_map1 SEC(".maps"), inner_map2 SEC(".maps");
+
+    struct {
+            __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+            __uint(max_entries, 2);
+            __type(key, __u32);
+            __array(values, struct inner_map);
+    } outer_map SEC(".maps") = {
+            .values = { &inner_map1,
+                        &inner_map2 }
+    };
+
+See ``progs/test_btf_map_in_map.c`` in ``tools/testing/selftests/bpf`` for more
+examples of declarative initialisation of outer maps.
+
+User Space
+----------
+
+This snippet shows how to create an array based outer map:
+
+.. code-block:: c
+
+    int create_outer_array(int inner_fd) {
+            LIBBPF_OPTS(bpf_map_create_opts, opts, .inner_map_fd = inner_fd);
+            int fd;
+
+            fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS,
+                                "example_array",       /* name */
+                                sizeof(__u32),         /* key size */
+                                sizeof(__u32),         /* value size */
+                                256,                   /* max entries */
+                                &opts);                /* create opts */
+            return fd;
+    }
+
+
+This snippet shows how to add an inner map to an outer map:
+
+.. code-block:: c
+
+    int add_devmap(int outer_fd, int index, const char *name) {
+            int fd;
+
+            fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP, name,
+                                sizeof(__u32), sizeof(__u32), 256, NULL);
+            if (fd < 0)
+                    return fd;
+
+            return bpf_map_update_elem(outer_fd, &index, &fd, BPF_ANY);
+    }
+
+References
+==========
+
+- https://lore.kernel.org/netdev/20170322170035.923581-3-kafai@fb.com/
+- https://lore.kernel.org/netdev/20170322170035.923581-4-kafai@fb.com/
--- a/Documentation/bpf/map_queue_stack.rst
+++ b/Documentation/bpf/map_queue_stack.rst
@ -0,0 +1,146 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=========================================
+BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK
+=========================================
+
+.. note::
+   - ``BPF_MAP_TYPE_QUEUE`` and ``BPF_MAP_TYPE_STACK`` were introduced
+     in kernel version 4.20
+
+``BPF_MAP_TYPE_QUEUE`` provides FIFO storage and ``BPF_MAP_TYPE_STACK``
+provides LIFO storage for BPF programs. These maps support peek, pop and
+push operations that are exposed to BPF programs through the respective
+helpers. These operations are exposed to userspace applications using
+the existing ``bpf`` syscall in the following way:
+
+- ``BPF_MAP_LOOKUP_ELEM`` -> peek
+- ``BPF_MAP_LOOKUP_AND_DELETE_ELEM`` -> pop
+- ``BPF_MAP_UPDATE_ELEM`` -> push
+
+``BPF_MAP_TYPE_QUEUE`` and ``BPF_MAP_TYPE_STACK`` do not support
+``BPF_F_NO_PREALLOC``.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_map_push_elem()
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+
+An element ``value`` can be added to a queue or stack using the
+``bpf_map_push_elem`` helper. The ``flags`` parameter must be set to
+``BPF_ANY`` or ``BPF_EXIST``. If ``flags`` is set to ``BPF_EXIST`` then,
+when the queue or stack is full, the oldest element will be removed to
+make room for ``value`` to be added. Returns ``0`` on success, or
+negative error in case of failure.
+
+bpf_map_peek_elem()
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   long bpf_map_peek_elem(struct bpf_map *map, void *value)
+
+This helper fetches an element ``value`` from a queue or stack without
+removing it. Returns ``0`` on success, or negative error in case of
+failure.
+
+bpf_map_pop_elem()
+~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   long bpf_map_pop_elem(struct bpf_map *map, void *value)
+
+This helper removes an element into ``value`` from a queue or
+stack. Returns ``0`` on success, or negative error in case of failure.
+
+
+Userspace
+---------
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   int bpf_map_update_elem (int fd, const void *key, const void *value, __u64 flags)
+
+A userspace program can push ``value`` onto a queue or stack using libbpf's
+``bpf_map_update_elem`` function. The ``key`` parameter must be set to
+``NULL`` and ``flags`` must be set to ``BPF_ANY`` or ``BPF_EXIST``, with the
+same semantics as the ``bpf_map_push_elem`` kernel helper. Returns ``0`` on
+success, or negative error in case of failure.
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   int bpf_map_lookup_elem (int fd, const void *key, void *value)
+
+A userspace program can peek at the ``value`` at the head of a queue or stack
+using the libbpf ``bpf_map_lookup_elem`` function. The ``key`` parameter must be
+set to ``NULL``.  Returns ``0`` on success, or negative error in case of
+failure.
+
+bpf_map_lookup_and_delete_elem()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   int bpf_map_lookup_and_delete_elem (int fd, const void *key, void *value)
+
+A userspace program can pop a ``value`` from the head of a queue or stack using
+the libbpf ``bpf_map_lookup_and_delete_elem`` function. The ``key`` parameter
+must be set to ``NULL``. Returns ``0`` on success, or negative error in case of
+failure.
+
+Examples
+========
+
+Kernel BPF
+----------
+
+This snippet shows how to declare a queue in a BPF program:
+
+.. code-block:: c
+
+    struct {
+            __uint(type, BPF_MAP_TYPE_QUEUE);
+            __type(value, __u32);
+            __uint(max_entries, 10);
+    } queue SEC(".maps");
+
+
+Userspace
+---------
+
+This snippet shows how to use libbpf's low-level API to create a queue from
+userspace:
+
+.. code-block:: c
+
+    int create_queue()
+    {
+            return bpf_map_create(BPF_MAP_TYPE_QUEUE,
+                                  "sample_queue", /* name */
+                                  0,              /* key size, must be zero */
+                                  sizeof(__u32),  /* value size */
+                                  10,             /* max entries */
+                                  NULL);          /* create options */
+    }
+
+
+References
+==========
+
+https://lwn.net/ml/netdev/153986858555.9127.14517764371945179514.stgit@kernel/
--- a/Documentation/bpf/map_sk_storage.rst
+++ b/Documentation/bpf/map_sk_storage.rst
@ -0,0 +1,155 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+=======================
+BPF_MAP_TYPE_SK_STORAGE
+=======================
+
+.. note::
+   - ``BPF_MAP_TYPE_SK_STORAGE`` was introduced in kernel version 5.2
+
+``BPF_MAP_TYPE_SK_STORAGE`` is used to provide socket-local storage for BPF
+programs. A map of type ``BPF_MAP_TYPE_SK_STORAGE`` declares the type of storage
+to be provided and acts as the handle for accessing the socket-local
+storage. The values for maps of type ``BPF_MAP_TYPE_SK_STORAGE`` are stored
+locally with each socket instead of with the map. The kernel is responsible for
+allocating storage for a socket when requested and for freeing the storage when
+either the map or the socket is deleted.
+
+.. note::
+  - The key type must be ``int`` and ``max_entries`` must be set to ``0``.
+  - The ``BPF_F_NO_PREALLOC`` flag must be used when creating a map for
+    socket-local storage.
+
+Usage
+=====
+
+Kernel BPF
+----------
+
+bpf_sk_storage_get()
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
+
+Socket-local storage can be retrieved using the ``bpf_sk_storage_get()``
+helper. The helper gets the storage from ``sk`` that is associated with ``map``.
+If the ``BPF_LOCAL_STORAGE_GET_F_CREATE`` flag is used then
+``bpf_sk_storage_get()`` will create the storage for ``sk`` if it does not
+already exist. ``value`` can be used together with
+``BPF_LOCAL_STORAGE_GET_F_CREATE`` to initialize the storage value, otherwise it
+will be zero initialized. Returns a pointer to the storage on success, or
+``NULL`` in case of failure.
+
+.. note::
+   - ``sk`` is a kernel ``struct sock`` pointer for LSM or tracing programs.
+   - ``sk`` is a ``struct bpf_sock`` pointer for other program types.
+
+bpf_sk_storage_delete()
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
+
+Socket-local storage can be deleted using the ``bpf_sk_storage_delete()``
+helper. The helper deletes the storage from ``sk`` that is identified by
+``map``. Returns ``0`` on success, or negative error in case of failure.
+
+User space
+----------
+
+bpf_map_update_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   int bpf_map_update_elem(int map_fd, const void *key, const void *value, __u64 flags)
+
+Socket-local storage for the socket identified by ``key`` belonging to
+``map_fd`` can be added or updated using the ``bpf_map_update_elem()`` libbpf
+function. ``key`` must be a pointer to a valid ``fd`` in the user space
+program. The ``flags`` parameter can be used to control the update behaviour:
+
+- ``BPF_ANY`` will create storage for ``fd`` or update existing storage.
+- ``BPF_NOEXIST`` will create storage for ``fd`` only if it did not already
+  exist, otherwise the call will fail with ``-EEXIST``.
+- ``BPF_EXIST`` will update existing storage for ``fd`` if it already exists,
+  otherwise the call will fail with ``-ENOENT``.
+
+Returns ``0`` on success, or negative error in case of failure.
+
+bpf_map_lookup_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   int bpf_map_lookup_elem(int map_fd, const void *key, void *value)
+
+Socket-local storage for the socket identified by ``key`` belonging to
+``map_fd`` can be retrieved using the ``bpf_map_lookup_elem()`` libbpf
+function. ``key`` must be a pointer to a valid ``fd`` in the user space
+program. Returns ``0`` on success, or negative error in case of failure.
+
+bpf_map_delete_elem()
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   int bpf_map_delete_elem(int map_fd, const void *key)
+
+Socket-local storage for the socket identified by ``key`` belonging to
+``map_fd`` can be deleted using the ``bpf_map_delete_elem()`` libbpf
+function. Returns ``0`` on success, or negative error in case of failure.
+
+Examples
+========
+
+Kernel BPF
+----------
+
+This snippet shows how to declare socket-local storage in a BPF program:
+
+.. code-block:: c
+
+    struct {
+            __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+            __uint(map_flags, BPF_F_NO_PREALLOC);
+            __type(key, int);
+            __type(value, struct my_storage);
+    } socket_storage SEC(".maps");
+
+This snippet shows how to retrieve socket-local storage in a BPF program:
+
+.. code-block:: c
+
+    SEC("sockops")
+    int _sockops(struct bpf_sock_ops *ctx)
+    {
+            struct my_storage *storage;
+            struct bpf_sock *sk;
+
+            sk = ctx->sk;
+            if (!sk)
+                    return 1;
+
+            storage = bpf_sk_storage_get(&socket_storage, sk, 0,
+                                         BPF_LOCAL_STORAGE_GET_F_CREATE);
+            if (!storage)
+                    return 1;
+
+            /* Use 'storage' here */
+
+            return 1;
+    }
+
+
+Please see the ``tools/testing/selftests/bpf`` directory for functional
+examples.
+
+References
+==========
+
+https://lwn.net/ml/netdev/20190426171103.61892-1-kafai@fb.com/
--- a/Documentation/bpf/map_xskmap.rst
+++ b/Documentation/bpf/map_xskmap.rst
@ -0,0 +1,192 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+===================
+BPF_MAP_TYPE_XSKMAP
+===================
+
+.. note::
+   - ``BPF_MAP_TYPE_XSKMAP`` was introduced in kernel version 4.18
+
+The ``BPF_MAP_TYPE_XSKMAP`` is used as a backend map for XDP BPF helper
+call ``bpf_redirect_map()`` and ``XDP_REDIRECT`` action, like 'devmap' and 'cpumap'.
+This map type redirects raw XDP frames to `AF_XDP`_ sockets (XSKs), a new type of
+address family in the kernel that allows redirection of frames from a driver to
+user space without having to traverse the full network stack. An AF_XDP socket
+binds to a single netdev queue. A mapping of XSKs to queues is shown below:
+
+.. code-block:: none
+
+    +---------------------------------------------------+
+    |     xsk A      |     xsk B       |      xsk C     |<---+ User space
+    =========================================================|==========
+    |    Queue 0     |     Queue 1     |     Queue 2    |    |  Kernel
+    +---------------------------------------------------+    |
+    |                  Netdev eth0                      |    |
+    +---------------------------------------------------+    |
+    |                            +=============+        |    |
+    |                            | key |  xsk  |        |    |
+    |  +---------+               +=============+        |    |
+    |  |         |               |  0  | xsk A |        |    |
+    |  |         |               +-------------+        |    |
+    |  |         |               |  1  | xsk B |        |    |
+    |  | BPF     |-- redirect -->+-------------+-------------+
+    |  | prog    |               |  2  | xsk C |        |
+    |  |         |               +-------------+        |
+    |  |         |                                      |
+    |  |         |                                      |
+    |  +---------+                                      |
+    |                                                   |
+    +---------------------------------------------------+
+
+.. note::
+    An AF_XDP socket that is bound to a certain <netdev/queue_id> will *only*
+    accept XDP frames from that <netdev/queue_id>. If an XDP program tries to redirect
+    from a <netdev/queue_id> other than what the socket is bound to, the frame will
+    not be received on the socket.
+
+Typically an XSKMAP is created per netdev. This map contains an array of XSK File
+Descriptors (FDs). The number of array elements is typically set or adjusted using
+the ``max_entries`` map parameter. For AF_XDP ``max_entries`` is equal to the number
+of queues supported by the netdev.
+
+.. note::
+    Both the map key and map value size must be 4 bytes.
+
+Usage
+=====
+
+Kernel BPF
+----------
+bpf_redirect_map()
+^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+    long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+
+Redirect the packet to the endpoint referenced by ``map`` at index ``key``.
+For ``BPF_MAP_TYPE_XSKMAP`` this map contains references to XSK FDs
+for sockets attached to a netdev's queues.
+
+.. note::
+    If the map is empty at an index, the packet is dropped. This means that it is
+    necessary to have an XDP program loaded with at least one XSK in the
+    XSKMAP to be able to get any traffic to user space through the socket.
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+    void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+
+XSK entry references of type ``struct xdp_sock *`` can be retrieved using the
+``bpf_map_lookup_elem()`` helper.
+
+User space
+----------
+.. note::
+    XSK entries can only be updated/deleted from user space and not from
+    a BPF program. Trying to call these functions from a kernel BPF program will
+    result in the program failing to load and a verifier warning.
+
+bpf_map_update_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+	int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags)
+
+XSK entries can be added or updated using the ``bpf_map_update_elem()``
+helper. The ``key`` parameter is equal to the queue_id of the queue the XSK
+is attaching to. And the ``value`` parameter is the FD value of that socket.
+
+Under the hood, the XSKMAP update function uses the XSK FD value to retrieve the
+associated ``struct xdp_sock`` instance.
+
+The flags argument can be one of the following:
+
+- BPF_ANY: Create a new element or update an existing element.
+- BPF_NOEXIST: Create a new element only if it did not exist.
+- BPF_EXIST: Update an existing element.
+
+bpf_map_lookup_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+    int bpf_map_lookup_elem(int fd, const void *key, void *value)
+
+Returns ``struct xdp_sock *`` or negative error in case of failure.
+
+bpf_map_delete_elem()
+^^^^^^^^^^^^^^^^^^^^^
+.. code-block:: c
+
+    int bpf_map_delete_elem(int fd, const void *key)
+
+XSK entries can be deleted using the ``bpf_map_delete_elem()``
+helper. This helper will return 0 on success, or negative error in case of
+failure.
+
+.. note::
+    When `libxdp`_ deletes an XSK it also removes the associated socket
+    entry from the XSKMAP.
+
+Examples
+========
+Kernel
+------
+
+The following code snippet shows how to declare a ``BPF_MAP_TYPE_XSKMAP`` called
+``xsks_map`` and how to redirect packets to an XSK.
+
+.. code-block:: c
+
+	struct {
+		__uint(type, BPF_MAP_TYPE_XSKMAP);
+		__type(key, __u32);
+		__type(value, __u32);
+		__uint(max_entries, 64);
+	} xsks_map SEC(".maps");
+
+
+	SEC("xdp")
+	int xsk_redir_prog(struct xdp_md *ctx)
+	{
+		__u32 index = ctx->rx_queue_index;
+
+		if (bpf_map_lookup_elem(&xsks_map, &index))
+			return bpf_redirect_map(&xsks_map, index, 0);
+		return XDP_PASS;
+	}
+
+User space
+----------
+
+The following code snippet shows how to update an XSKMAP with an XSK entry.
+
+.. code-block:: c
+
+	int update_xsks_map(struct bpf_map *xsks_map, int queue_id, int xsk_fd)
+	{
+		int ret;
+
+		ret = bpf_map_update_elem(bpf_map__fd(xsks_map), &queue_id, &xsk_fd, 0);
+		if (ret < 0)
+			fprintf(stderr, "Failed to update xsks_map: %s\n", strerror(errno));
+
+		return ret;
+	}
+
+For an example on how create AF_XDP sockets, please see the AF_XDP-example and
+AF_XDP-forwarding programs in the `bpf-examples`_ directory in the `libxdp`_ repository.
+For a detailed explaination of the AF_XDP interface please see:
+
+- `libxdp-readme`_.
+- `AF_XDP`_ kernel documentation.
+
+.. note::
+    The most comprehensive resource for using XSKMAPs and AF_XDP is `libxdp`_.
+
+.. _libxdp: https://github.com/xdp-project/xdp-tools/tree/master/lib/libxdp
+.. _AF_XDP: https://www.kernel.org/doc/html/latest/networking/af_xdp.html
+.. _bpf-examples: https://github.com/xdp-project/bpf-examples
+.. _libxdp-readme: https://github.com/xdp-project/xdp-tools/tree/master/lib/libxdp#using-af_xdp-sockets
--- a/Documentation/bpf/maps.rst
+++ b/Documentation/bpf/maps.rst
@ -1,46 +1,19 @@

-=========
-eBPF maps
-=========
+========
+BPF maps
+========

-'maps' is a generic storage of different types for sharing data between kernel
-and userspace.
+BPF 'maps' provide generic storage of different types for sharing data between
+kernel and user space. There are several storage types available, including
+hash, array, bloom filter and radix-tree. Several of the map types exist to
+support specific BPF helpers that perform actions based on the map contents. The
+maps are accessed from BPF programs via BPF helpers which are documented in the
+`man-pages`_ for `bpf-helpers(7)`_.

-The maps are accessed from user space via BPF syscall, which has commands:
-
- create a map with given type and attributes
-  ``map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)``
-  using attr->map_type, attr->key_size, attr->value_size, attr->max_entries
-  returns process-local file descriptor or negative error
-
- lookup key in a given map
-  ``err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)``
-  using attr->map_fd, attr->key, attr->value
-  returns zero and stores found elem into value or negative error
-
- create or update key/value pair in a given map
-  ``err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)``
-  using attr->map_fd, attr->key, attr->value
-  returns zero or negative error
-
- find and delete element by key in a given map
-  ``err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)``
-  using attr->map_fd, attr->key
-
- to delete map: close(fd)
-  Exiting process will delete maps automatically
-
-userspace programs use this syscall to create/access maps that eBPF programs
-are concurrently updating.
-
-maps can have different types: hash, array, bloom filter, radix-tree, etc.
-
-The map is defined by:
-
-  - type
-  - max number of elements
-  - key size in bytes
-  - value size in bytes
+BPF maps are accessed from user space via the ``bpf`` syscall, which provides
+commands to create maps, lookup elements, update elements and delete
+elements. More details of the BPF syscall are available in
+:doc:`/userspace-api/ebpf/syscall` and in the `man-pages`_ for `bpf(2)`_.

 Map Types
 =========
@ -49,4 +22,60 @@ Map Types
   :maxdepth: 1
   :glob:

-   map_*
+   map_*
+
+Usage Notes
+===========
+
+.. c:function::
+   int bpf(int command, union bpf_attr *attr, u32 size)
+
+Use the ``bpf()`` system call to perform the operation specified by
+``command``. The operation takes parameters provided in ``attr``. The ``size``
+argument is the size of the ``union bpf_attr`` in ``attr``.
+
+**BPF_MAP_CREATE**
+
+Create a map with the desired type and attributes in ``attr``:
+
+.. code-block:: c
+
+    int fd;
+    union bpf_attr attr = {
+            .map_type = BPF_MAP_TYPE_ARRAY;  /* mandatory */
+            .key_size = sizeof(__u32);       /* mandatory */
+            .value_size = sizeof(__u32);     /* mandatory */
+            .max_entries = 256;              /* mandatory */
+            .map_flags = BPF_F_MMAPABLE;
+            .map_name = "example_array";
+    };
+
+    fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+
+Returns a process-local file descriptor on success, or negative error in case of
+failure. The map can be deleted by calling ``close(fd)``. Maps held by open
+file descriptors will be deleted automatically when a process exits.
+
+.. note:: Valid characters for ``map_name`` are ``A-Z``, ``a-z``, ``0-9``,
+   ``'_'`` and ``'.'``.
+
+**BPF_MAP_LOOKUP_ELEM**
+
+Lookup key in a given map using ``attr->map_fd``, ``attr->key``,
+``attr->value``. Returns zero and stores found elem into ``attr->value`` on
+success, or negative error on failure.
+
+**BPF_MAP_UPDATE_ELEM**
+
+Create or update key/value pair in a given map using ``attr->map_fd``, ``attr->key``,
+``attr->value``. Returns zero on success or negative error on failure.
+
+**BPF_MAP_DELETE_ELEM**
+
+Find and delete element by key in a given map using ``attr->map_fd``,
+``attr->key``. Returns zero on success or negative error on failure.
+
+.. Links:
+.. _man-pages: https://www.kernel.org/doc/man-pages/
+.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html
+.. _bpf-helpers(7): https://man7.org/linux/man-pages/man7/bpf-helpers.7.html
--- a/Documentation/bpf/programs.rst
+++ b/Documentation/bpf/programs.rst
@ -7,3 +7,6 @@ Program Types
   :glob:

   prog_*
+
+For a list of all program types, see :ref:`program_types_and_elf` in
+the :ref:`libbpf` documentation.
--- a/Documentation/bpf/redirect.rst
+++ b/Documentation/bpf/redirect.rst
@ -0,0 +1,81 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+.. Copyright (C) 2022 Red Hat, Inc.
+
+========
+Redirect
+========
+XDP_REDIRECT
+############
+Supported maps
+--------------
+
+XDP_REDIRECT works with the following map types:
+
+- ``BPF_MAP_TYPE_DEVMAP``
+- ``BPF_MAP_TYPE_DEVMAP_HASH``
+- ``BPF_MAP_TYPE_CPUMAP``
+- ``BPF_MAP_TYPE_XSKMAP``
+
+For more information on these maps, please see the specific map documentation.
+
+Process
+-------
+
+.. kernel-doc:: net/core/filter.c
+   :doc: xdp redirect
+
+.. note::
+    Not all drivers support transmitting frames after a redirect, and for
+    those that do, not all of them support non-linear frames. Non-linear xdp
+    bufs/frames are bufs/frames that contain more than one fragment.
+
+Debugging packet drops
+----------------------
+Silent packet drops for XDP_REDIRECT can be debugged using:
+
+- bpf_trace
+- perf_record
+
+bpf_trace
+^^^^^^^^^
+The following bpftrace command can be used to capture and count all XDP tracepoints:
+
+.. code-block:: none
+
+    sudo bpftrace -e 'tracepoint:xdp:* { @cnt[probe] = count(); }'
+    Attaching 12 probes...
+    ^C
+
+    @cnt[tracepoint:xdp:mem_connect]: 18
+    @cnt[tracepoint:xdp:mem_disconnect]: 18
+    @cnt[tracepoint:xdp:xdp_exception]: 19605
+    @cnt[tracepoint:xdp:xdp_devmap_xmit]: 1393604
+    @cnt[tracepoint:xdp:xdp_redirect]: 22292200
+
+.. note::
+    The various xdp tracepoints can be found in ``source/include/trace/events/xdp.h``
+
+The following bpftrace command can be used to extract the ``ERRNO`` being returned as
+part of the err parameter:
+
+.. code-block:: none
+
+    sudo bpftrace -e \
+    'tracepoint:xdp:xdp_redirect*_err {@redir_errno[-args->err] = count();}
+    tracepoint:xdp:xdp_devmap_xmit {@devmap_errno[-args->err] = count();}'
+
+perf record
+^^^^^^^^^^^
+The perf tool also supports recording tracepoints:
+
+.. code-block:: none
+
+    perf record -a -e xdp:xdp_redirect_err \
+        -e xdp:xdp_redirect_map_err \
+        -e xdp:xdp_exception \
+        -e xdp:xdp_devmap_xmit
+
+References
+===========
+
+- https://github.com/xdp-project/xdp-tutorial/tree/master/tracing02-xdp-monitor
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@ -194,6 +194,24 @@ finally:
    else:
        version = release = "unknown version"

+#
+# HACK: there seems to be no easy way for us to get at the version and
+# release information passed in from the makefile...so go pawing through the
+# command-line options and find it for ourselves.
+#
+def get_cline_version():
+    c_version = c_release = ''
+    for arg in sys.argv:
+        if arg.startswith('version='):
+            c_version = arg[8:]
+        elif arg.startswith('release='):
+            c_release = arg[8:]
+    if c_version:
+        if c_release:
+            return c_version + '-' + c_release
+        return c_version
+    return version # Whatever we came up with before
+
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #
@ -247,7 +265,7 @@ highlight_language = 'none'
 # a list of builtin themes.

 # Default theme
-html_theme = 'sphinx_rtd_theme'
+html_theme = 'alabaster'
 html_css_files = []

 if "DOCS_THEME" in os.environ:
@ -278,8 +296,12 @@ if html_theme == 'sphinx_rtd_theme' or html_theme == 'sphinx_rtd_dark_mode':
                # Add color-specific RTD normal mode
                html_css_files.append('theme_rtd_colors.css')

+        html_theme_options = {
+            'navigation_depth': -1,
+        }
+
    except ImportError:
-        html_theme = 'classic'
+        html_theme = 'alabaster'

 if "DOCS_CSS" in os.environ:
    css = os.environ["DOCS_CSS"].split(" ")
@ -295,127 +317,29 @@ if major <= 1 and minor < 8:
    for l in html_css_files:
        html_context['css_files'].append('_static/' + l)

-if  html_theme == 'classic':
+if  html_theme == 'alabaster':
    html_theme_options = {
-        'rightsidebar':        False,
-        'stickysidebar':       True,
-        'collapsiblesidebar':  True,
-        'externalrefs':        False,
-
-        'footerbgcolor':       "white",
-        'footertextcolor':     "white",
-        'sidebarbgcolor':      "white",
-        'sidebarbtncolor':     "black",
-        'sidebartextcolor':    "black",
-        'sidebarlinkcolor':    "#686bff",
-        'relbarbgcolor':       "#133f52",
-        'relbartextcolor':     "white",
-        'relbarlinkcolor':     "white",
-        'bgcolor':             "white",
-        'textcolor':           "black",
-        'headbgcolor':         "#f2f2f2",
-        'headtextcolor':       "#20435c",
-        'headlinkcolor':       "#c60f0f",
-        'linkcolor':           "#355f7c",
-        'visitedlinkcolor':    "#355f7c",
-        'codebgcolor':         "#3f3f3f",
-        'codetextcolor':       "white",
-
-        'bodyfont':            "serif",
-        'headfont':            "sans-serif",
+        'description': get_cline_version(),
+        'page_width': '65em',
+        'sidebar_width': '15em',
+        'font_size': 'inherit',
+        'font_family': 'serif',
    }

 sys.stderr.write("Using %s theme\n" % html_theme)

-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
-
-# The name for this set of Sphinx documents.  If None, it defaults to
-# "<project> v<release> documentation".
-#html_title = None
-
-# A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#html_logo = None
-
-# The name of an image file (within the static path) to use as favicon of the
-# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#html_favicon = None
-
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['sphinx-static']

-# Add any extra paths that contain custom files (such as robots.txt or
-# .htaccess) here, relative to this directory. These files are copied
-# directly to the root of the documentation.
-#html_extra_path = []
-
-# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
-# using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
-
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
 html_use_smartypants = False

 # Custom sidebar templates, maps document names to template names.
-# Note that the RTD theme ignores this.
-html_sidebars = { '**': ['searchbox.html', 'localtoc.html', 'sourcelink.html']}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#html_additional_pages = {}
-
-# If false, no module index is generated.
-#html_domain_indices = True
-
-# If false, no index is generated.
-#html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it.  The value of this option must be the
-# base URL from which the finished HTML is served.
-#html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
-
-# Language to be used for generating the HTML full-text search index.
-# Sphinx supports the following languages:
-#   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
-#   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
-#html_search_language = 'en'
-
-# A dictionary with options for the search language support, empty by default.
-# Now only 'ja' uses this config value
-#html_search_options = {'type': 'default'}
-
-# The name of a javascript file (relative to the configuration directory) that
-# implements a search results scorer. If empty, the default will be used.
-#html_search_scorer = 'scorer.js'
+# Note that the RTD theme ignores this
+html_sidebars = { '**': ["about.html", 'searchbox.html', 'localtoc.html', 'sourcelink.html']}

 # Output file base name for HTML help builder.
 htmlhelp_basename = 'TheLinuxKerneldoc'
@ -558,19 +482,6 @@ texinfo_documents = [
     'Miscellaneous'),
 ]

-# Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
-
-# If false, no module index is generated.
-#texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
-
-# If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
-
-
 # -- Options for Epub output ----------------------------------------------

 # Bibliographic Dublin Core info.
@ -579,67 +490,9 @@ epub_author = author
 epub_publisher = author
 epub_copyright = copyright

-# The basename for the epub file. It defaults to the project name.
-#epub_basename = project
-
-# The HTML theme for the epub output. Since the default themes are not
-# optimized for small screen space, using the same theme for HTML and epub
-# output is usually not wise. This defaults to 'epub', a theme designed to save
-# visual space.
-#epub_theme = 'epub'
-
-# The language of the text. It defaults to the language option
-# or 'en' if the language is not set.
-#epub_language = ''
-
-# The scheme of the identifier. Typical schemes are ISBN or URL.
-#epub_scheme = ''
-
-# The unique identifier of the text. This can be a ISBN number
-# or the project homepage.
-#epub_identifier = ''
-
-# A unique identification for the text.
-#epub_uid = ''
-
-# A tuple containing the cover image and cover page html template filenames.
-#epub_cover = ()
-
-# A sequence of (type, uri, title) tuples for the guide element of content.opf.
-#epub_guide = ()
-
-# HTML files that should be inserted before the pages created by sphinx.
-# The format is a list of tuples containing the path and title.
-#epub_pre_files = []
-
-# HTML files that should be inserted after the pages created by sphinx.
-# The format is a list of tuples containing the path and title.
-#epub_post_files = []
-
 # A list of files that should not be packed into the epub file.
 epub_exclude_files = ['search.html']

-# The depth of the table of contents in toc.ncx.
-#epub_tocdepth = 3
-
-# Allow duplicate toc entries.
-#epub_tocdup = True
-
-# Choose between 'default' and 'includehidden'.
-#epub_tocscope = 'default'
-
-# Fix unsupported image types using the Pillow.
-#epub_fix_images = False
-
-# Scale large images.
-#epub_max_image_width = 0
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#epub_show_urls = 'inline'
-
-# If false, no index is generated.
-#epub_use_index = True
-
 #=======
 # rst2pdf
 #
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@ -36,6 +36,9 @@ String Conversions
 String Manipulation
 -------------------

+.. kernel-doc:: include/linux/fortify-string.h
+   :internal:
+
 .. kernel-doc:: lib/string.c
   :export:

@ -171,9 +174,6 @@ Division Functions
 .. kernel-doc:: include/linux/math64.h
   :internal:

-.. kernel-doc:: lib/math/div64.c
-   :functions: div_s64_rem div64_u64_rem div64_u64 div64_s64
-
 .. kernel-doc:: lib/math/gcd.c
   :export:

--- a/Documentation/core-api/local_ops.rst
+++ b/Documentation/core-api/local_ops.rst
@ -191,7 +191,7 @@ Here is a sample module which implements a basic per cpu counter using

    static void __exit test_exit(void)
    {
-            del_timer_sync(&test_timer);
+            timer_shutdown_sync(&test_timer);
    }

    module_init(test_init);
--- a/Documentation/cpu-freq/index.rst
+++ b/Documentation/cpu-freq/index.rst
@ -20,18 +20,15 @@ Author: Dominik Brodowski  <linux@brodo.de>

 Mailing List
 ------------
-There is a CPU frequency changing CVS commit and general list where
-you can report bugs, problems or submit patches. To post a message,
-send an email to linux-pm@vger.kernel.org.
+There is a CPU frequency general list where you can report bugs,
+problems or submit patches. To post a message, send an email to
+linux-pm@vger.kernel.org.

 Links
 -----
 the FTP archives:
 * ftp://ftp.linux.org.uk/pub/linux/cpufreq/

-how to access the CVS repository:
-* http://cvs.arm.linux.org.uk/
-
 the CPUFreq Mailing list:
 * http://vger.kernel.org/vger-lists.html#linux-pm

--- a/Documentation/crypto/devel-algos.rst
+++ b/Documentation/crypto/devel-algos.rst
@ -172,7 +172,7 @@ Here are schematics of how these functions are called when operated from
 other part of the kernel. Note that the .setkey() call might happen
 before or after any of these schematics happen, but must not happen
 during any of these are in-flight. Please note that calling .init()
-followed immediately by .finish() is also a perfectly valid
+followed immediately by .final() is also a perfectly valid
 transformation.

 ::
--- a/Documentation/crypto/userspace-if.rst
+++ b/Documentation/crypto/userspace-if.rst
@ -131,9 +131,9 @@ from the kernel crypto API. If the buffer is too small for the message
 digest, the flag MSG_TRUNC is set by the kernel.

 In order to set a message digest key, the calling application must use
-the setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC
-operation is performed without the initial HMAC state change caused by
-the key.
+the setsockopt() option of ALG_SET_KEY or ALG_SET_KEY_BY_KEY_SERIAL. If the
+key is not set the HMAC operation is performed without the initial HMAC state
+change caused by the key.

 Symmetric Cipher API
 --------------------
@ -382,6 +382,15 @@ mentioned optname:

   -  the RNG cipher type to provide the seed

+- ALG_SET_KEY_BY_KEY_SERIAL -- Setting the key via keyring key_serial_t.
+   This operation behaves the same as ALG_SET_KEY. The decrypted
+   data is copied from a keyring key, and uses that data as the
+   key for symmetric encryption.
+
+   The passed in key_serial_t must have the KEY_(POS|USR|GRP|OTH)_SEARCH
+   permission set, otherwise -EPERM is returned. Supports key types: user,
+   logon, encrypted, and trusted.
+
 -  ALG_SET_AEAD_AUTHSIZE -- Setting the authentication tag size for
   AEAD ciphers. For a encryption operation, the authentication tag of
   the given size will be generated. For a decryption operation, the
--- a/Documentation/dev-tools/ktap.rst
+++ b/Documentation/dev-tools/ktap.rst
@ -80,8 +80,8 @@ have the number 1 and the number then must increase by 1 for each additional
 subtest within the same test at the same nesting level.

 The description is a description of the test, generally the name of
-the test, and can be any string of words (can't include #). The
-description is optional, but recommended.
+the test, and can be any string of characters other than # or a
+newline.  The description is optional, but recommended.

 The directive and any diagnostic data is optional. If either are present, they
 must follow a hash sign, "#".
--- a/Documentation/dev-tools/kunit/architecture.rst
+++ b/Documentation/dev-tools/kunit/architecture.rst
@ -4,16 +4,17 @@
 KUnit Architecture
 ==================

-The KUnit architecture can be divided into two parts:
+The KUnit architecture is divided into two parts:

 - `In-Kernel Testing Framework`_
- `kunit_tool (Command Line Test Harness)`_
+- `kunit_tool (Command-line Test Harness)`_

 In-Kernel Testing Framework
 ===========================

 The kernel testing library supports KUnit tests written in C using
-KUnit. KUnit tests are kernel code. KUnit does several things:
+KUnit. These KUnit tests are kernel code. KUnit performs the following
+tasks:

 - Organizes tests
 - Reports test results
@ -22,19 +23,17 @@ KUnit. KUnit tests are kernel code. KUnit does several things:
 Test Cases
 ----------

-The fundamental unit in KUnit is the test case. The KUnit test cases are
-grouped into KUnit suites. A KUnit test case is a function with type
-signature ``void (*)(struct kunit *test)``.
-These test case functions are wrapped in a struct called
-struct kunit_case.
+The test case is the fundamental unit in KUnit. KUnit test cases are organised
+into suites. A KUnit test case is a function with type signature
+``void (*)(struct kunit *test)``. These test case functions are wrapped in a
+struct called struct kunit_case.

 .. note:
 	``generate_params`` is optional for non-parameterized tests.

-Each KUnit test case gets a ``struct kunit`` context
-object passed to it that tracks a running test. The KUnit assertion
-macros and other KUnit utilities use the ``struct kunit`` context
-object. As an exception, there are two fields:
+Each KUnit test case receives a ``struct kunit`` context object that tracks a
+running test. The KUnit assertion macros and other KUnit utilities use the
+``struct kunit`` context object. As an exception, there are two fields:

 - ``->priv``: The setup functions can use it to store arbitrary test
  user data.
@ -77,12 +76,13 @@ Executor

 The KUnit executor can list and run built-in KUnit tests on boot.
 The Test suites are stored in a linker section
-called ``.kunit_test_suites``. For code, see:
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/asm-generic/vmlinux.lds.h?h=v5.15#n945.
+called ``.kunit_test_suites``. For the code, see ``KUNIT_TABLE()`` macro
+definition in
+`include/asm-generic/vmlinux.lds.h <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/asm-generic/vmlinux.lds.h?h=v6.0#n950>`_.
 The linker section consists of an array of pointers to
 ``struct kunit_suite``, and is populated by the ``kunit_test_suites()``
-macro. To run all tests compiled into the kernel, the KUnit executor
-iterates over the linker section array.
+macro. The KUnit executor iterates over the linker section array in order to
+run all the tests that are compiled into the kernel.

 .. kernel-figure:: kunit_suitememorydiagram.svg
 	:alt:	KUnit Suite Memory
@ -90,17 +90,17 @@ iterates over the linker section array.
 	KUnit Suite Memory Diagram

 On the kernel boot, the KUnit executor uses the start and end addresses
-of this section to iterate over and run all tests. For code, see:
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/kunit/executor.c
-
+of this section to iterate over and run all tests. For the implementation of the
+executor, see
+`lib/kunit/executor.c <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/kunit/executor.c>`_.
 When built as a module, the ``kunit_test_suites()`` macro defines a
 ``module_init()`` function, which runs all the tests in the compilation
 unit instead of utilizing the executor.

 In KUnit tests, some error classes do not affect other tests
 or parts of the kernel, each KUnit case executes in a separate thread
-context. For code, see:
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/kunit/try-catch.c?h=v5.15#n58
+context. See the ``kunit_try_catch_run()`` function in
+`lib/kunit/try-catch.c <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/kunit/try-catch.c?h=v5.15#n58>`_.

 Assertion Macros
 ----------------
@ -111,37 +111,36 @@ All expectations/assertions are formatted as:

 - ``{EXPECT|ASSERT}`` determines whether the check is an assertion or an
  expectation.
+  In the event of a failure, the testing flow differs as follows:

-	- For an expectation, if the check fails, marks the test as failed
-	  and logs the failure.
+	- For expectations, the test is marked as failed and the failure is logged.

-	- An assertion, on failure, causes the test case to terminate
-	  immediately.
+	- Failing assertions, on the other hand, result in the test case being
+	  terminated immediately.

-		- Assertions call function:
+		- Assertions call the function:
 		  ``void __noreturn kunit_abort(struct kunit *)``.

-		- ``kunit_abort`` calls function:
+		- ``kunit_abort`` calls the function:
 		  ``void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch)``.

-		- ``kunit_try_catch_throw`` calls function:
+		- ``kunit_try_catch_throw`` calls the function:
 		  ``void kthread_complete_and_exit(struct completion *, long) __noreturn;``
 		  and terminates the special thread context.

 - ``<op>`` denotes a check with options: ``TRUE`` (supplied property
-  has the boolean value “true”), ``EQ`` (two supplied properties are
+  has the boolean value "true"), ``EQ`` (two supplied properties are
  equal), ``NOT_ERR_OR_NULL`` (supplied pointer is not null and does not
-  contain an “err” value).
+  contain an "err" value).

 - ``[_MSG]`` prints a custom message on failure.

 Test Result Reporting
 ---------------------
-KUnit prints test results in KTAP format. KTAP is based on TAP14, see:
-https://github.com/isaacs/testanything.github.io/blob/tap14/tap-version-14-specification.md.
-KTAP (yet to be standardized format) works with KUnit and Kselftest.
-The KUnit executor prints KTAP results to dmesg, and debugfs
-(if configured).
+KUnit prints the test results in KTAP format. KTAP is based on TAP14, see
+Documentation/dev-tools/ktap.rst.
+KTAP works with KUnit and Kselftest. The KUnit executor prints KTAP results to
+dmesg, and debugfs (if configured).

 Parameterized Tests
 -------------------
@ -150,33 +149,35 @@ Each KUnit parameterized test is associated with a collection of
 parameters. The test is invoked multiple times, once for each parameter
 value and the parameter is stored in the ``param_value`` field.
 The test case includes a KUNIT_CASE_PARAM() macro that accepts a
-generator function.
-The generator function is passed the previous parameter and returns the next
-parameter. It also provides a macro to generate common-case generators based on
-arrays.
+generator function. The generator function is passed the previous parameter
+and returns the next parameter. It also includes a macro for generating
+array-based common-case generators.

-kunit_tool (Command Line Test Harness)
+kunit_tool (Command-line Test Harness)
 ======================================

-kunit_tool is a Python script ``(tools/testing/kunit/kunit.py)``
-that can be used to configure, build, exec, parse and run (runs other
-commands in order) test results. You can either run KUnit tests using
-kunit_tool or can include KUnit in kernel and parse manually.
+``kunit_tool`` is a Python script, found in ``tools/testing/kunit/kunit.py``. It
+is used to configure, build, execute, parse test results and run all of the
+previous commands in correct order (i.e., configure, build, execute and parse).
+You have two options for running KUnit tests: either build the kernel with KUnit
+enabled and manually parse the results (see
+Documentation/dev-tools/kunit/run_manual.rst) or use ``kunit_tool``
+(see Documentation/dev-tools/kunit/run_wrapper.rst).

 - ``configure`` command generates the kernel ``.config`` from a
  ``.kunitconfig`` file (and any architecture-specific options).
-  For some architectures, additional config options are specified in the
-  ``qemu_config`` Python script
-  (For example: ``tools/testing/kunit/qemu_configs/powerpc.py``).
+  The Python scripts available in ``qemu_configs`` folder
+  (for example, ``tools/testing/kunit/qemu configs/powerpc.py``) contains
+  additional configuration options for specific architectures.
  It parses both the existing ``.config`` and the ``.kunitconfig`` files
-  and ensures that ``.config`` is a superset of ``.kunitconfig``.
-  If this is not the case, it will combine the two and run
-  ``make olddefconfig`` to regenerate the ``.config`` file. It then
-  verifies that ``.config`` is now a superset. This checks if all
-  Kconfig dependencies are correctly specified in ``.kunitconfig``.
-  ``kunit_config.py`` includes the parsing Kconfigs code. The code which
-  runs ``make olddefconfig`` is a part of ``kunit_kernel.py``. You can
-  invoke this command via: ``./tools/testing/kunit/kunit.py config`` and
+  to ensure that ``.config`` is a superset of ``.kunitconfig``.
+  If not, it will combine the two and run ``make olddefconfig`` to regenerate
+  the ``.config`` file. It then checks to see if ``.config`` has become a superset.
+  This verifies that all the Kconfig dependencies are correctly specified in the
+  file ``.kunitconfig``. The ``kunit_config.py`` script contains the code for parsing
+  Kconfigs. The code which runs ``make olddefconfig`` is part of the
+  ``kunit_kernel.py`` script. You can invoke this command through:
+  ``./tools/testing/kunit/kunit.py config`` and
  generate a ``.config`` file.
 - ``build`` runs ``make`` on the kernel tree with required options
  (depends on the architecture and some options, for example: build_dir)
@ -184,8 +185,8 @@ kunit_tool or can include KUnit in kernel and parse manually.
  To build a KUnit kernel from the current ``.config``, you can use the
  ``build`` argument: ``./tools/testing/kunit/kunit.py build``.
 - ``exec`` command executes kernel results either directly (using
-  User-mode Linux configuration), or via an emulator such
-  as QEMU. It reads results from the log via standard
+  User-mode Linux configuration), or through an emulator such
+  as QEMU. It reads results from the log using standard
  output (stdout), and passes them to ``parse`` to be parsed.
  If you already have built a kernel with built-in KUnit tests,
  you can run the kernel and display the test results with the ``exec``
--- a/Documentation/dev-tools/kunit/index.rst
+++ b/Documentation/dev-tools/kunit/index.rst
@ -16,7 +16,6 @@ KUnit - Linux Kernel Unit Testing
 	api/index
 	style
 	faq
-	tips
 	running_tips

 This section details the kernel unit testing framework.
@ -100,14 +99,11 @@ Read also :ref:`kinds-of-tests`.
 How do I use it?
 ================

-*   Documentation/dev-tools/kunit/start.rst - for KUnit new users.
-*   Documentation/dev-tools/kunit/architecture.rst - KUnit architecture.
-*   Documentation/dev-tools/kunit/run_wrapper.rst - run kunit_tool.
-*   Documentation/dev-tools/kunit/run_manual.rst - run tests without kunit_tool.
-*   Documentation/dev-tools/kunit/usage.rst - write tests.
-*   Documentation/dev-tools/kunit/tips.rst - best practices with
-    examples.
-*   Documentation/dev-tools/kunit/api/index.rst - KUnit APIs
-    used for testing.
-*   Documentation/dev-tools/kunit/faq.rst - KUnit common questions and
-    answers.
+You can find a step-by-step guide to writing and running KUnit tests in
+Documentation/dev-tools/kunit/start.rst
+
+Alternatively, feel free to look through the rest of the KUnit documentation,
+or to experiment with tools/testing/kunit/kunit.py and the example test under
+lib/kunit/kunit-example-test.c
+
+Happy testing!
--- a/Documentation/dev-tools/kunit/start.rst
+++ b/Documentation/dev-tools/kunit/start.rst
@ -294,13 +294,11 @@ Congrats! You just wrote your first KUnit test.
 Next Steps
 ==========

-*   Documentation/dev-tools/kunit/architecture.rst - KUnit architecture.
-*   Documentation/dev-tools/kunit/run_wrapper.rst - run kunit_tool.
-*   Documentation/dev-tools/kunit/run_manual.rst - run tests without kunit_tool.
-*   Documentation/dev-tools/kunit/usage.rst - write tests.
-*   Documentation/dev-tools/kunit/tips.rst - best practices with
-    examples.
-*   Documentation/dev-tools/kunit/api/index.rst - KUnit APIs
-    used for testing.
-*   Documentation/dev-tools/kunit/faq.rst - KUnit common questions and
-    answers.
+If you're interested in using some of the more advanced features of kunit.py,
+take a look at Documentation/dev-tools/kunit/run_wrapper.rst
+
+If you'd like to run tests without using kunit.py, check out
+Documentation/dev-tools/kunit/run_manual.rst
+
+For more information on writing KUnit tests (including some common techniques
+for testing different things), see Documentation/dev-tools/kunit/usage.rst
--- a/Documentation/dev-tools/kunit/tips.rst
+++ b/Documentation/dev-tools/kunit/tips.rst
@ -1,190 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-============================
-Tips For Writing KUnit Tests
-============================
-
-Exiting early on failed expectations
------------------------------------
-
-``KUNIT_EXPECT_EQ`` and friends will mark the test as failed and continue
-execution.  In some cases, it's unsafe to continue and you can use the
-``KUNIT_ASSERT`` variant to exit on failure.
-
-.. code-block:: c
-
-	void example_test_user_alloc_function(struct kunit *test)
-	{
-		void *object = alloc_some_object_for_me();
-
-		/* Make sure we got a valid pointer back. */
-		KUNIT_ASSERT_NOT_ERR_OR_NULL(test, object);
-		do_something_with_object(object);
-	}
-
-Allocating memory
-----------------
-
-Where you would use ``kzalloc``, you should prefer ``kunit_kzalloc`` instead.
-KUnit will ensure the memory is freed once the test completes.
-
-This is particularly useful since it lets you use the ``KUNIT_ASSERT_EQ``
-macros to exit early from a test without having to worry about remembering to
-call ``kfree``.
-
-Example:
-
-.. code-block:: c
-
-	void example_test_allocation(struct kunit *test)
-	{
-		char *buffer = kunit_kzalloc(test, 16, GFP_KERNEL);
-		/* Ensure allocation succeeded. */
-		KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buffer);
-
-		KUNIT_ASSERT_STREQ(test, buffer, "");
-	}
-
-
-Testing static functions
------------------------
-
-If you don't want to expose functions or variables just for testing, one option
-is to conditionally ``#include`` the test file at the end of your .c file, e.g.
-
-.. code-block:: c
-
-	/* In my_file.c */
-
-	static int do_interesting_thing();
-
-	#ifdef CONFIG_MY_KUNIT_TEST
-	#include "my_kunit_test.c"
-	#endif
-
-Injecting test-only code
------------------------
-
-Similarly to the above, it can be useful to add test-specific logic.
-
-.. code-block:: c
-
-	/* In my_file.h */
-
-	#ifdef CONFIG_MY_KUNIT_TEST
-	/* Defined in my_kunit_test.c */
-	void test_only_hook(void);
-	#else
-	void test_only_hook(void) { }
-	#endif
-
-This test-only code can be made more useful by accessing the current kunit
-test, see below.
-
-Accessing the current test
--------------------------
-
-In some cases, you need to call test-only code from outside the test file, e.g.
-like in the example above or if you're providing a fake implementation of an
-ops struct.
-There is a ``kunit_test`` field in ``task_struct``, so you can access it via
-``current->kunit_test``.
-
-Here's a slightly in-depth example of how one could implement "mocking":
-
-.. code-block:: c
-
-	#include <linux/sched.h> /* for current */
-
-	struct test_data {
-		int foo_result;
-		int want_foo_called_with;
-	};
-
-	static int fake_foo(int arg)
-	{
-		struct kunit *test = current->kunit_test;
-		struct test_data *test_data = test->priv;
-
-		KUNIT_EXPECT_EQ(test, test_data->want_foo_called_with, arg);
-		return test_data->foo_result;
-	}
-
-	static void example_simple_test(struct kunit *test)
-	{
-		/* Assume priv is allocated in the suite's .init */
-		struct test_data *test_data = test->priv;
-
-		test_data->foo_result = 42;
-		test_data->want_foo_called_with = 1;
-
-		/* In a real test, we'd probably pass a pointer to fake_foo somewhere
-		 * like an ops struct, etc. instead of calling it directly. */
-		KUNIT_EXPECT_EQ(test, fake_foo(1), 42);
-	}
-
-
-Note: here we're able to get away with using ``test->priv``, but if you wanted
-something more flexible you could use a named ``kunit_resource``, see
-Documentation/dev-tools/kunit/api/test.rst.
-
-Failing the current test
------------------------
-
-But sometimes, you might just want to fail the current test. In that case, we
-have ``kunit_fail_current_test(fmt, args...)`` which is defined in ``<kunit/test-bug.h>`` and
-doesn't require pulling in ``<kunit/test.h>``.
-
-E.g. say we had an option to enable some extra debug checks on some data structure:
-
-.. code-block:: c
-
-	#include <kunit/test-bug.h>
-
-	#ifdef CONFIG_EXTRA_DEBUG_CHECKS
-	static void validate_my_data(struct data *data)
-	{
-		if (is_valid(data))
-			return;
-
-		kunit_fail_current_test("data %p is invalid", data);
-
-		/* Normal, non-KUnit, error reporting code here. */
-	}
-	#else
-	static void my_debug_function(void) { }
-	#endif
-
-
-Customizing error messages
--------------------------
-
-Each of the ``KUNIT_EXPECT`` and ``KUNIT_ASSERT`` macros have a ``_MSG`` variant.
-These take a format string and arguments to provide additional context to the automatically generated error messages.
-
-.. code-block:: c
-
-	char some_str[41];
-	generate_sha1_hex_string(some_str);
-
-	/* Before. Not easy to tell why the test failed. */
-	KUNIT_EXPECT_EQ(test, strlen(some_str), 40);
-
-	/* After. Now we see the offending string. */
-	KUNIT_EXPECT_EQ_MSG(test, strlen(some_str), 40, "some_str='%s'", some_str);
-
-Alternatively, one can take full control over the error message by using ``KUNIT_FAIL()``, e.g.
-
-.. code-block:: c
-
-	/* Before */
-	KUNIT_EXPECT_EQ(test, some_setup_function(), 0);
-
-	/* After: full control over the failure message. */
-	if (some_setup_function())
-		KUNIT_FAIL(test, "Failed to setup thing for testing");
-
-Next Steps
-==========
-*   Optional: see the Documentation/dev-tools/kunit/usage.rst page for a more
-    in-depth explanation of KUnit.
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@ -112,11 +112,45 @@ terminates the test case if the condition is not satisfied. For example:
 			KUNIT_EXPECT_LE(test, a[i], a[i + 1]);
 	}

-In this example, the method under test should return pointer to a value. If the
-pointer returns null or an errno, we want to stop the test since the following
-expectation could crash the test case. `ASSERT_NOT_ERR_OR_NULL(...)` allows us
-to bail out of the test case if the appropriate conditions are not satisfied to
-complete the test.
+In this example, we need to be able to allocate an array to test the ``sort()``
+function. So we use ``KUNIT_ASSERT_NOT_ERR_OR_NULL()`` to abort the test if
+there's an allocation error.
+
+.. note::
+   In other test frameworks, ``ASSERT`` macros are often implemented by calling
+   ``return`` so they only work from the test function. In KUnit, we stop the
+   current kthread on failure, so you can call them from anywhere.
+
+Customizing error messages
+--------------------------
+
+Each of the ``KUNIT_EXPECT`` and ``KUNIT_ASSERT`` macros have a ``_MSG``
+variant.  These take a format string and arguments to provide additional
+context to the automatically generated error messages.
+
+.. code-block:: c
+
+	char some_str[41];
+	generate_sha1_hex_string(some_str);
+
+	/* Before. Not easy to tell why the test failed. */
+	KUNIT_EXPECT_EQ(test, strlen(some_str), 40);
+
+	/* After. Now we see the offending string. */
+	KUNIT_EXPECT_EQ_MSG(test, strlen(some_str), 40, "some_str='%s'", some_str);
+
+Alternatively, one can take full control over the error message by using
+``KUNIT_FAIL()``, e.g.
+
+.. code-block:: c
+
+	/* Before */
+	KUNIT_EXPECT_EQ(test, some_setup_function(), 0);
+
+	/* After: full control over the failure message. */
+	if (some_setup_function())
+		KUNIT_FAIL(test, "Failed to setup thing for testing");
+

 Test Suites
 ~~~~~~~~~~~
@ -546,24 +580,6 @@ By reusing the same ``cases`` array from above, we can write the test as a
 		{}
 	};

-Exiting Early on Failed Expectations
------------------------------------
-
-We can use ``KUNIT_EXPECT_EQ`` to mark the test as failed and continue
-execution.  In some cases, it is unsafe to continue. We can use the
-``KUNIT_ASSERT`` variant to exit on failure.
-
-.. code-block:: c
-
-	void example_test_user_alloc_function(struct kunit *test)
-	{
-		void *object = alloc_some_object_for_me();
-
-		/* Make sure we got a valid pointer back. */
-		KUNIT_ASSERT_NOT_ERR_OR_NULL(test, object);
-		do_something_with_object(object);
-	}
-
 Allocating Memory
 -----------------

@ -625,17 +641,23 @@ as shown in next section: *Accessing The Current Test*.
 Accessing The Current Test
 --------------------------

-In some cases, we need to call test-only code from outside the test file.
-For example, see example in section *Injecting Test-Only Code* or if
-we are providing a fake implementation of an ops struct. Using
-``kunit_test`` field in ``task_struct``, we can access it via
-``current->kunit_test``.
+In some cases, we need to call test-only code from outside the test file.  This
+is helpful, for example, when providing a fake implementation of a function, or
+to fail any current test from within an error handler.
+We can do this via the ``kunit_test`` field in ``task_struct``, which we can
+access using the ``kunit_get_current_test()`` function in ``kunit/test-bug.h``.

-The example below includes how to implement "mocking":
+``kunit_get_current_test()`` is safe to call even if KUnit is not enabled. If
+KUnit is not enabled, was built as a module (``CONFIG_KUNIT=m``), or no test is
+running in the current task, it will return ``NULL``. This compiles down to
+either a no-op or a static key check, so will have a negligible performance
+impact when no test is running.
+
+The example below uses this to implement a "mock" implementation of a function, ``foo``:

 .. code-block:: c

-	#include <linux/sched.h> /* for current */
+	#include <kunit/test-bug.h> /* for kunit_get_current_test */

 	struct test_data {
 		int foo_result;
@ -644,7 +666,7 @@ The example below includes how to implement "mocking":

 	static int fake_foo(int arg)
 	{
-		struct kunit *test = current->kunit_test;
+		struct kunit *test = kunit_get_current_test();
 		struct test_data *test_data = test->priv;

 		KUNIT_EXPECT_EQ(test, test_data->want_foo_called_with, arg);
@ -675,7 +697,7 @@ Each test can have multiple resources which have string names providing the same
 flexibility as a ``priv`` member, but also, for example, allowing helper
 functions to create resources without conflicting with each other. It is also
 possible to define a clean up function for each resource, making it easy to
-avoid resource leaks. For more information, see Documentation/dev-tools/kunit/api/test.rst.
+avoid resource leaks. For more information, see Documentation/dev-tools/kunit/api/resource.rst.

 Failing The Current Test
 ------------------------
@ -703,3 +725,9 @@ structures as shown below:
 	static void my_debug_function(void) { }
 	#endif

+``kunit_fail_current_test()`` is safe to call even if KUnit is not enabled. If
+KUnit is not enabled, was built as a module (``CONFIG_KUNIT=m``), or no test is
+running in the current task, it will do nothing. This compiles down to either a
+no-op or a static key check, so will have a negligible performance impact when
+no test is running.
+
--- a/Show More
+++ b/Show More