EFI changes for v5.8:

- preliminary changes for RISC-V - add support for setting the resolution on the EFI framebuffer - simplify kernel image loading for arm64 - Move .bss into .data via the linker script instead of relying on symbol annotations. - Get rid of __pure getters to access global variables - Clean up the config table matching arrays -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEEnNKg2mrY9zMBdeK7wjcgfpV0+n0FAl6i4QUACgkQwjcgfpV0 +n3wxwf/d8+8UD2fsx0MZtGP6SguVFVjiKo/cfaIHPQh+i4uFFqatPpdF/gFDT+M SUVG1xtxqfSbwC0XlbfRf2LIsRjwy9J6WXV1hMARMd5mV9G18FRudMyIHbYPRjJS SQds6/4uTzKpRtUUHusocKfyaH9adxE3dGuFMfI0gMPBy0jpwvmMMyULFUnnxOAX XPuB9EQHAZOPjA/QnmahKyPWKpDOtxx+t5WMejig0+IWHSLsVGzjOsFPEb41Ekqz 7lGP2o/upQ7FPsbEwANcDZcieRW2u8E4y9Yri5gJpEei6TGzo2oHgwBJROjUTRvB 8j1H7xyTO9ADNnKpClrnGpOkPoSLjw== =sg+h -----END PGP SIGNATURE----- Merge tag 'efi-next' of git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi into efi/core Pull EFI changes for v5.8 from Ard Biesheuvel: "- preliminary changes for RISC-V - add support for setting the resolution on the EFI framebuffer - simplify kernel image loading for arm64 - Move .bss into .data via the linker script instead of relying on symbol annotations. - Get rid of __pure getters to access global variables - Clean up the config table matching arrays" Signed-off-by: Ingo Molnar <mingo@kernel.org>
2020-04-25 10:25:02 +02:00 · 2020-04-25 10:25:02 +02:00 · 4353dd3b70
commit 4353dd3b70
parent 594e576d4b 4eb8320bd1
11124 changed files with 511245 additions and 242376 deletions
--- a/.clang-format
+++ b/.clang-format
@ -86,6 +86,8 @@ ForEachMacros:
  - 'bio_for_each_segment_all'
  - 'bio_list_for_each'
  - 'bip_for_each_vec'
+  - 'bitmap_for_each_clear_region'
+  - 'bitmap_for_each_set_region'
  - 'blkg_for_each_descendant_post'
  - 'blkg_for_each_descendant_pre'
  - 'blk_queue_for_each_rl'
@ -115,6 +117,7 @@ ForEachMacros:
  - 'drm_client_for_each_connector_iter'
  - 'drm_client_for_each_modeset'
  - 'drm_connector_for_each_possible_encoder'
+  - 'drm_for_each_bridge_in_chain'
  - 'drm_for_each_connector_iter'
  - 'drm_for_each_crtc'
  - 'drm_for_each_encoder'
@ -136,9 +139,10 @@ ForEachMacros:
  - 'for_each_bio'
  - 'for_each_board_func_rsrc'
  - 'for_each_bvec'
+  - 'for_each_card_auxs'
+  - 'for_each_card_auxs_safe'
  - 'for_each_card_components'
-  - 'for_each_card_links'
-  - 'for_each_card_links_safe'
+  - 'for_each_card_pre_auxs'
  - 'for_each_card_prelinks'
  - 'for_each_card_rtds'
  - 'for_each_card_rtds_safe'
@ -166,6 +170,7 @@ ForEachMacros:
  - 'for_each_dpcm_fe'
  - 'for_each_drhd_unit'
  - 'for_each_dss_dev'
+  - 'for_each_efi_handle'
  - 'for_each_efi_memory_desc'
  - 'for_each_efi_memory_desc_in_map'
  - 'for_each_element'
@ -190,6 +195,7 @@ ForEachMacros:
  - 'for_each_lru'
  - 'for_each_matching_node'
  - 'for_each_matching_node_and_match'
+  - 'for_each_member'
  - 'for_each_memblock'
  - 'for_each_memblock_type'
  - 'for_each_memcg_cache_index'
@ -200,9 +206,11 @@ ForEachMacros:
  - 'for_each_msi_entry'
  - 'for_each_msi_entry_safe'
  - 'for_each_net'
+  - 'for_each_net_continue_reverse'
  - 'for_each_netdev'
  - 'for_each_netdev_continue'
  - 'for_each_netdev_continue_rcu'
+  - 'for_each_netdev_continue_reverse'
  - 'for_each_netdev_feature'
  - 'for_each_netdev_in_bond_rcu'
  - 'for_each_netdev_rcu'
@ -254,10 +262,10 @@ ForEachMacros:
  - 'for_each_reserved_mem_region'
  - 'for_each_rtd_codec_dai'
  - 'for_each_rtd_codec_dai_rollback'
-  - 'for_each_rtdcom'
-  - 'for_each_rtdcom_safe'
+  - 'for_each_rtd_components'
  - 'for_each_set_bit'
  - 'for_each_set_bit_from'
+  - 'for_each_set_clump8'
  - 'for_each_sg'
  - 'for_each_sg_dma_page'
  - 'for_each_sg_page'
@ -267,6 +275,7 @@ ForEachMacros:
  - 'for_each_subelement_id'
  - '__for_each_thread'
  - 'for_each_thread'
+  - 'for_each_wakeup_source'
  - 'for_each_zone'
  - 'for_each_zone_zonelist'
  - 'for_each_zone_zonelist_nodemask'
@ -330,6 +339,7 @@ ForEachMacros:
  - 'list_for_each'
  - 'list_for_each_codec'
  - 'list_for_each_codec_safe'
+  - 'list_for_each_continue'
  - 'list_for_each_entry'
  - 'list_for_each_entry_continue'
  - 'list_for_each_entry_continue_rcu'
@ -351,6 +361,7 @@ ForEachMacros:
  - 'llist_for_each_entry'
  - 'llist_for_each_entry_safe'
  - 'llist_for_each_safe'
+  - 'mci_for_each_dimm'
  - 'media_device_for_each_entity'
  - 'media_device_for_each_intf'
  - 'media_device_for_each_link'
@ -444,10 +455,16 @@ ForEachMacros:
  - 'virtio_device_for_each_vq'
  - 'xa_for_each'
  - 'xa_for_each_marked'
+  - 'xa_for_each_range'
  - 'xa_for_each_start'
  - 'xas_for_each'
  - 'xas_for_each_conflict'
  - 'xas_for_each_marked'
+  - 'xbc_array_for_each_value'
+  - 'xbc_for_each_key_value'
+  - 'xbc_node_for_each_array_value'
+  - 'xbc_node_for_each_child'
+  - 'xbc_node_for_each_key_value'
  - 'zorro_for_each_dev'

 #IncludeBlocks: Preserve # Unknown to clang-format-5.0
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # NOTE! Don't add files that are generated in specific
 # subdirectories here. Add them in the ".gitignore" file
--- a/.mailmap
+++ b/.mailmap
@ -210,6 +210,7 @@ Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com>
 Oleksij Rempel <linux@rempel-privat.de> <fixed-term.Oleksij.Rempel@de.bosch.com>
 Oleksij Rempel <linux@rempel-privat.de> <o.rempel@pengutronix.de>
 Oleksij Rempel <linux@rempel-privat.de> <ore@pengutronix.de>
+Pali Rohár <pali@kernel.org> <pali.rohar@gmail.com>
 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
 Patrick Mochel <mochel@digitalimplant.org>
 Paul Burton <paulburton@kernel.org> <paul.burton@imgtec.com>
@ -225,6 +226,7 @@ Pratyush Anand <pratyush.anand@gmail.com> <pratyush.anand@st.com>
 Praveen BP <praveenbp@ti.com>
 Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com>
 Qais Yousef <qsyousef@gmail.com> <qais.yousef@imgtec.com>
+Quentin Monnet <quentin@isovalent.com> <quentin.monnet@netronome.com>
 Quentin Perret <qperret@qperret.net> <quentin.perret@arm.com>
 Rafael J. Wysocki <rjw@rjwysocki.net> <rjw@sisk.pl>
 Rajesh Shah <rajesh.shah@intel.com>
@ -243,9 +245,11 @@ Santosh Shilimkar <ssantosh@kernel.org>
 Santosh Shilimkar <santosh.shilimkar@oracle.org>
 Sascha Hauer <s.hauer@pengutronix.de>
 S.Çağlar Onur <caglar@pardus.org.tr>
+Sakari Ailus <sakari.ailus@linux.intel.com> <sakari.ailus@iki.fi>
 Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk>
 Sebastian Reichel <sre@kernel.org> <sre@debian.org>
 Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk>
+Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de>
 Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
 Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
 Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
--- a/Documentation/.gitignore
+++ b/Documentation/.gitignore
@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 output
 *.pyc
--- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled
+++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled
@ -0,0 +1,9 @@
+This ABI is renamed and moved to a new location /sys/kernel/fadump/enabled.
+
+What:		/sys/kernel/fadump_enabled
+Date:		Feb 2012
+Contact:	linuxppc-dev@lists.ozlabs.org
+Description:	read only
+		Primarily used to identify whether the FADump is enabled in
+		the kernel or not.
+User:		Kdump service
--- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
+++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
@ -0,0 +1,10 @@
+This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.¬
+
+What:		/sys/kernel/fadump_registered
+Date:		Feb 2012
+Contact:	linuxppc-dev@lists.ozlabs.org
+Description:	read/write
+		Helps to control the dump collect feature from userspace.
+		Setting 1 to this file enables the system to collect the
+		dump and 0 to disable it.
+User:		Kdump service
--- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
+++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
@ -0,0 +1,10 @@
+This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.¬
+
+What:		/sys/kernel/fadump_release_mem
+Date:		Feb 2012
+Contact:	linuxppc-dev@lists.ozlabs.org
+Description:	write only
+		This is a special sysfs file and only available when
+		the system is booted to capture the vmcore using FADump.
+		It is used to release the memory reserved by FADump to
+		save the crash dump.
--- a/Documentation/ABI/obsolete/sysfs-selinux-checkreqprot
+++ b/Documentation/ABI/obsolete/sysfs-selinux-checkreqprot
@ -0,0 +1,23 @@
+What:		/sys/fs/selinux/checkreqprot
+Date:		April 2005 (predates git)
+KernelVersion:	2.6.12-rc2 (predates git)
+Contact:	selinux@vger.kernel.org
+Description:
+
+	The selinuxfs "checkreqprot" node allows SELinux to be configured
+	to check the protection requested by userspace for mmap/mprotect
+	calls instead of the actual protection applied by the kernel.
+	This was a compatibility mechanism for legacy userspace and
+	for the READ_IMPLIES_EXEC personality flag.  However, if set to
+	1, it weakens security by allowing mappings to be made executable
+	without authorization by policy.  The default value of checkreqprot
+	at boot was changed starting in Linux v4.4 to 0 (i.e. check the
+	actual protection), and Android and Linux distributions have been
+	explicitly writing a "0" to /sys/fs/selinux/checkreqprot during
+	initialization for some time.  Support for setting checkreqprot to 1
+	will be	removed in a future kernel release, at which point the kernel
+	will always cease using checkreqprot internally and will always
+	check the actual protections being applied upon mmap/mprotect calls.
+	The checkreqprot selinuxfs node will remain for backward compatibility
+	but will discard writes of the "0" value and will reject writes of the
+	"1" value when this mechanism is removed.
--- a/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore
+++ b/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore
@ -0,0 +1,9 @@
+This ABI is moved to /sys/firmware/opal/mpipl/release_core.
+
+What:		/sys/kernel/fadump_release_opalcore
+Date:		Sep 2019
+Contact:	linuxppc-dev@lists.ozlabs.org
+Description:	write only
+		The sysfs file is available when the system is booted to
+		collect the dump on OPAL based machine. It used to release
+		the memory used to collect the opalcore.
--- a/Documentation/ABI/removed/sysfs-kernel-uids
+++ b/Documentation/ABI/removed/sysfs-kernel-uids
@ -1,5 +1,5 @@
 What:		/sys/kernel/uids/<uid>/cpu_shares
-Date:		December 2007
+Date:		December 2007, finally removed in kernel v2.6.34-rc1
 Contact:	Dhaval Giani <dhaval@linux.vnet.ibm.com>
 		Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
 Description:
--- a/drivers/staging/most/Documentation/ABI/configfs-most.txt
+++ b/drivers/staging/most/Documentation/ABI/configfs-most.txt
@ -194,11 +194,3 @@ Description:

 		destroy_link	write '1' to this attribute to destroy an
 				active link
-
-What: 		/sys/kernel/config/rdma_cm/<hca>/ports/<port-num>/default_roce_tos
-Date: 		March 8, 2019
-KernelVersion:  5.2
-Description: 	RDMA-CM QPs from HCA <hca> at port <port-num>
-		will be created with this TOS as default.
-		This can be overridden by using the rdma_set_option API.
-		The possible RoCE TOS values are 0-255.
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@ -43,6 +43,20 @@ Description:    Allows the root user to read or write directly through the
                If the IOMMU is disabled, it also allows the root user to read
                or write from the host a device VA of a host mapped memory

+What:           /sys/kernel/debug/habanalabs/hl<n>/data64
+Date:           Jan 2020
+KernelVersion:  5.6
+Contact:        oded.gabbay@gmail.com
+Description:    Allows the root user to read or write 64 bit data directly
+                through the device's PCI bar. Writing to this file generates a
+                write transaction while reading from the file generates a read
+                transaction. This custom interface is needed (instead of using
+                the generic Linux user-space PCI mapping) because the DDR bar
+                is very small compared to the DDR memory and only the driver can
+                move the bar before and after the transaction.
+                If the IOMMU is disabled, it also allows the root user to read
+                or write from the host a device VA of a host mapped memory
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/device
 Date:           Jan 2019
 KernelVersion:  5.1
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti
@ -0,0 +1,241 @@
+What:		/sys/bus/coresight/devices/<cti-name>/enable
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(RW) Enable/Disable the CTI hardware.
+
+What:		/sys/bus/coresight/devices/<cti-name>/powered
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) Indicate if the CTI hardware is powered.
+
+What:		/sys/bus/coresight/devices/<cti-name>/ctmid
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) Display the associated CTM ID
+
+What:		/sys/bus/coresight/devices/<cti-name>/nr_trigger_cons
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) Number of devices connected to triggers on this CTI
+
+What:		/sys/bus/coresight/devices/<cti-name>/triggers<N>/name
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) Name of connected device <N>
+
+What:		/sys/bus/coresight/devices/<cti-name>/triggers<N>/in_signals
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) Input trigger signals from connected device <N>
+
+What:		/sys/bus/coresight/devices/<cti-name>/triggers<N>/in_types
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) Functional types for the input trigger signals
+		from connected device <N>
+
+What:		/sys/bus/coresight/devices/<cti-name>/triggers<N>/out_signals
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) Output trigger signals to connected device <N>
+
+What:		/sys/bus/coresight/devices/<cti-name>/triggers<N>/out_types
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) Functional types for the output trigger signals
+		to connected device <N>
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/inout_sel
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(RW) Select the index for inen and outen registers.
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/inen
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(RW) Read or write the CTIINEN register selected by inout_sel.
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/outen
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(RW) Read or write the CTIOUTEN register selected by inout_sel.
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/gate
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(RW) Read or write CTIGATE register.
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/asicctl
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(RW) Read or write ASICCTL register.
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/intack
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(W) Write the INTACK register.
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/appset
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(RW) Set CTIAPPSET register to activate channel. Read back to
+		determine current value of register.
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/appclear
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(W) Write APPCLEAR register to deactivate channel.
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/apppulse
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(W) Write APPPULSE to pulse a channel active for one clock
+		cycle.
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/chinstatus
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) Read current status of channel inputs.
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/choutstatus
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) read current status of channel outputs.
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/triginstatus
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) read current status of input trigger signals
+
+What:		/sys/bus/coresight/devices/<cti-name>/regs/trigoutstatus
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) read current status of output trigger signals.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/trigin_attach
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(W) Attach a CTI input trigger to a CTM channel.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/trigin_detach
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(W) Detach a CTI input trigger from a CTM channel.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/trigout_attach
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(W) Attach a CTI output trigger to a CTM channel.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/trigout_detach
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(W) Detach a CTI output trigger from a CTM channel.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/chan_gate_enable
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(RW) Enable CTIGATE for single channel (W) or list enabled
+		channels through the gate (R).
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/chan_gate_disable
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(W) Disable CTIGATE for single channel.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/chan_set
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(W) Activate a single channel.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/chan_clear
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(W) Deactivate a single channel.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/chan_pulse
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(W) Pulse a single channel - activate for a single clock cycle.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/trigout_filtered
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) List of output triggers filtered across all connections.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/trig_filter_enable
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(RW) Enable or disable trigger output signal filtering.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/chan_inuse
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) show channels with at least one attached trigger signal.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/chan_free
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) show channels with no attached trigger signals.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/chan_xtrigs_sel
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(RW) Write channel number to select a channel to view, read to
+		see selected channel number.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/chan_xtrigs_in
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) Read to see input triggers connected to selected view
+		channel.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/chan_xtrigs_out
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(R) Read to see output triggers connected to selected view
+		channel.
+
+What:		/sys/bus/coresight/devices/<cti-name>/channels/chan_xtrigs_reset
+Date:		March 2020
+KernelVersion	5.7
+Contact:	Mike Leach or Mathieu Poirier
+Description:	(W) Clear all channel / trigger programming.
--- a/Documentation/ABI/testing/sysfs-bus-counter-104-quad-8
+++ b/Documentation/ABI/testing/sysfs-bus-counter-104-quad-8
@ -1,3 +1,28 @@
+What:		/sys/bus/counter/devices/counterX/signalY/cable_fault
+KernelVersion:	5.7
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Read-only attribute that indicates whether a differential
+		encoder cable fault (not connected or loose wires) is detected
+		for the respective channel of Signal Y. Valid attribute values
+		are boolean. Detection must first be enabled via the
+		corresponding cable_fault_enable attribute.
+
+What:		/sys/bus/counter/devices/counterX/signalY/cable_fault_enable
+KernelVersion:	5.7
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Whether detection of differential encoder cable faults for the
+		respective channel of Signal Y is enabled. Valid attribute
+		values are boolean.
+
+What:		/sys/bus/counter/devices/counterX/signalY/filter_clock_prescaler
+KernelVersion:	5.7
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Filter clock factor for input Signal Y. This prescaler value
+		affects the inputs of both quadrature pair signals.
+
 What:		/sys/bus/counter/devices/counterX/signalY/index_polarity
 KernelVersion:	5.2
 Contact:	linux-iio@vger.kernel.org
--- a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7192
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7192
@ -2,17 +2,22 @@ What:		/sys/bus/iio/devices/iio:deviceX/ac_excitation_en
 KernelVersion:
 Contact:	linux-iio@vger.kernel.org
 Description:
-		Reading gives the state of AC excitation.
-		Writing '1' enables AC excitation.
+		This attribute, if available, is used to enable the AC
+		excitation mode found on some converters. In ac excitation mode,
+		the polarity of the excitation voltage is reversed on
+		alternate cycles, to eliminate DC errors.

 What:		/sys/bus/iio/devices/iio:deviceX/bridge_switch_en
 KernelVersion:
 Contact:	linux-iio@vger.kernel.org
 Description:
-		This bridge switch is used to disconnect it when there is a
-		need to minimize the system current consumption.
-		Reading gives the state of the bridge switch.
-		Writing '1' enables the bridge switch.
+		This attribute, if available, is used to close or open the
+		bridge power down switch found on some converters.
+		In bridge applications, such as strain gauges and load cells,
+		the bridge itself consumes the majority of the current in the
+		system. To minimize the current consumption of the system,
+		the bridge can be disconnected (when it is not being used
+		using the bridge_switch_en attribute.

 What:		/sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration
 KernelVersion:
@ -21,6 +26,13 @@ Description:
 		Initiates the system calibration procedure. This is done on a
 		single channel at a time. Write '1' to start the calibration.

+What:		/sys/bus/iio/devices/iio:deviceX/in_voltage2-voltage2_shorted_raw
+KernelVersion:
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Measure voltage from AIN2 pin connected to AIN(+)
+		and AIN(-) shorted.
+
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration_mode_available
 KernelVersion:
 Contact:	linux-iio@vger.kernel.org
--- a/Documentation/ABI/testing/sysfs-bus-intel_th-devices-msc
+++ b/Documentation/ABI/testing/sysfs-bus-intel_th-devices-msc
@ -40,3 +40,11 @@ Description:	(RW) Trigger window switch for the MSC's buffer, in
 		triggering a window switch for the buffer. Returns an error in any
 		other operating mode or attempts to write something other than "1".

+What:		/sys/bus/intel_th/devices/<intel_th_id>-msc<msc-id>/stop_on_full
+Date:		March 2020
+KernelVersion:	5.7
+Contact:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Description:	(RW) Configure whether trace stops when the last available window
+		becomes full (1/y/Y) or wraps around and continues until the next
+		window becomes available again (0/n/N).
+
--- a/drivers/staging/most/Documentation/ABI/sysfs-bus-most.txt
+++ b/drivers/staging/most/Documentation/ABI/sysfs-bus-most.txt
@ -5,7 +5,7 @@ Contact:	Christian Gromm <christian.gromm@microchip.com>
 Description:
 		Provides information about the interface type and the physical
 		location of the device. Hardware attached via USB, for instance,
-		might return <usb_device 1-1.1:1.0>
+		might return <1-1.1:1.0>
 Users:

 What:		/sys/bus/most/devices/.../interface
@ -278,25 +278,7 @@ Description:
 		Indicates whether current channel ran out of buffers.
 Users:

-What:		/sys/bus/most/drivers/mostcore/add_link
-Date:		March 2017
-KernelVersion:	4.15
-Contact:	Christian Gromm <christian.gromm@microchip.com>
-Description:
-		This is used to link a channel to a component of the
-		mostcore. A link created by writing to this file is
-		referred to as pipe.
-Users:
-
-What:		/sys/bus/most/drivers/mostcore/remove_link
-Date:		March 2017
-KernelVersion:	4.15
-Contact:	Christian Gromm <christian.gromm@microchip.com>
-Description:
-		This is used to unlink a channel from a component.
-Users:
-
-What:		/sys/bus/most/drivers/mostcore/components
+What:		/sys/bus/most/drivers/most_core/components
 Date:		March 2017
 KernelVersion:	4.15
 Contact:	Christian Gromm <christian.gromm@microchip.com>
@ -304,7 +286,7 @@ Description:
 		This is used to retrieve a list of registered components.
 Users:

-What:		/sys/bus/most/drivers/mostcore/links
+What:		/sys/bus/most/drivers/most_core/links
 Date:		March 2017
 KernelVersion:	4.15
 Contact:	Christian Gromm <christian.gromm@microchip.com>
--- a/Documentation/ABI/testing/sysfs-class-typec
+++ b/Documentation/ABI/testing/sysfs-class-typec
@ -20,13 +20,13 @@ Date:		April 2017
 Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
 Description:
 		The supported power roles. This attribute can be used to request
-		power role swap on the port when the port supports USB Power
-		Delivery. Swapping is supported as synchronous operation, so
-		write(2) to the attribute will not return until the operation
-		has finished. The attribute is notified about role changes so
-		that poll(2) on the attribute wakes up. Change on the role will
-		also generate uevent KOBJ_CHANGE. The current role is show in
-		brackets, for example "[source] sink" when in source mode.
+		power role swap on the port. Swapping is supported as
+		synchronous operation, so write(2) to the attribute will not
+		return until the operation has finished. The attribute is
+		notified about role changes so that poll(2) on the attribute
+		wakes up. Change on the role will also generate uevent
+		KOBJ_CHANGE. The current role is show in brackets, for example
+		"[source] sink" when in source mode.

 		Valid values: source, sink

@ -108,6 +108,15 @@ Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
 Description:
 		Revision number of the supported USB Type-C specification.

+What:		/sys/class/typec/<port>/orientation
+Date:		February 2020
+Contact:	Badhri Jagan Sridharan <badhri@google.com>
+Description:
+		Indicates the active orientation of the Type-C connector.
+		Valid values:
+		- "normal": CC1 orientation
+		- "reverse": CC2 orientation
+		- "unknown": Orientation cannot be determined.

 USB Type-C partner devices (eg. /sys/class/typec/port0-partner/)

--- a/Documentation/ABI/testing/sysfs-driver-jz4780-efuse
+++ b/Documentation/ABI/testing/sysfs-driver-jz4780-efuse
@ -0,0 +1,16 @@
+What:		/sys/devices/*/<our-device>/nvmem
+Date:		December 2017
+Contact:	PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
+Description:	read-only access to the efuse on the Ingenic JZ4780 SoC
+		The SoC has a one time programmable 8K efuse that is
+		split into segments. The driver supports read only.
+		The segments are
+		0x000   64 bit Random Number
+		0x008  128 bit Ingenic Chip ID
+		0x018  128 bit Customer ID
+		0x028 3520 bit Reserved
+		0x1E0    8 bit Protect Segment
+		0x1E1 2296 bit HDMI Key
+		0x300 2048 bit Security boot key
+Users:		any user space application which wants to read the Chip
+		and Customer ID
--- a/Documentation/ABI/testing/sysfs-driver-uacce
+++ b/Documentation/ABI/testing/sysfs-driver-uacce
@ -0,0 +1,39 @@
+What:           /sys/class/uacce/<dev_name>/api
+Date:           Feb 2020
+KernelVersion:  5.7
+Contact:        linux-accelerators@lists.ozlabs.org
+Description:    Api of the device
+                Can be any string and up to userspace to parse.
+                Application use the api to match the correct driver
+
+What:           /sys/class/uacce/<dev_name>/flags
+Date:           Feb 2020
+KernelVersion:  5.7
+Contact:        linux-accelerators@lists.ozlabs.org
+Description:    Attributes of the device, see UACCE_DEV_xxx flag defined in uacce.h
+
+What:           /sys/class/uacce/<dev_name>/available_instances
+Date:           Feb 2020
+KernelVersion:  5.7
+Contact:        linux-accelerators@lists.ozlabs.org
+Description:    Available instances left of the device
+                Return -ENODEV if uacce_ops get_available_instances is not provided
+
+What:           /sys/class/uacce/<dev_name>/algorithms
+Date:           Feb 2020
+KernelVersion:  5.7
+Contact:        linux-accelerators@lists.ozlabs.org
+Description:    Algorithms supported by this accelerator, separated by new line.
+                Can be any string and up to userspace to parse.
+
+What:           /sys/class/uacce/<dev_name>/region_mmio_size
+Date:           Feb 2020
+KernelVersion:  5.7
+Contact:        linux-accelerators@lists.ozlabs.org
+Description:    Size (bytes) of mmio region queue file
+
+What:           /sys/class/uacce/<dev_name>/region_dus_size
+Date:           Feb 2020
+KernelVersion:  5.7
+Contact:        linux-accelerators@lists.ozlabs.org
+Description:    Size (bytes) of dus region queue file
--- a/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups
+++ b/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups
@ -0,0 +1,21 @@
+What:		/sys/firmware/opal/sensor_groups
+Date:		August 2017
+Contact:	Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Description:	Sensor groups directory for POWER9 powernv servers
+
+		Each folder in this directory contains a sensor group
+		which are classified based on type of the sensor
+		like power, temperature, frequency, current, etc. They
+		can also indicate the group of sensors belonging to
+		different owners like CSM, Profiler, Job-Scheduler
+
+What:		/sys/firmware/opal/sensor_groups/<sensor_group_name>/clear
+Date:		August 2017
+Contact:	Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Description:	Sysfs file to clear the min-max of all the sensors
+		belonging to the group.
+
+		Writing 1 to this file will clear the minimum and
+		maximum values of all the sensors in the group.
+		In POWER9, the min-max of a sensor is the historical minimum
+		and maximum value of the sensor cached by OCC.
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@ -318,3 +318,8 @@ Date:		September 2019
 Contact:	"Hridya Valsaraju" <hridya@google.com>
 Description:	Average number of valid blocks.
 		Available when CONFIG_F2FS_STAT_FS=y.
+
+What:		/sys/fs/f2fs/<disk>/mounted_time_sec
+Date:		February 2020
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:	Show the mounted time in secs of this partition.
--- a/Documentation/ABI/testing/sysfs-kernel-fadump
+++ b/Documentation/ABI/testing/sysfs-kernel-fadump
@ -0,0 +1,40 @@
+What:		/sys/kernel/fadump/*
+Date:		Dec 2019
+Contact:	linuxppc-dev@lists.ozlabs.org
+Description:
+		The /sys/kernel/fadump/* is a collection of FADump sysfs
+		file provide information about the configuration status
+		of Firmware Assisted Dump (FADump).
+
+What:		/sys/kernel/fadump/enabled
+Date:		Dec 2019
+Contact:	linuxppc-dev@lists.ozlabs.org
+Description:	read only
+		Primarily used to identify whether the FADump is enabled in
+		the kernel or not.
+User:		Kdump service
+
+What:		/sys/kernel/fadump/registered
+Date:		Dec 2019
+Contact:	linuxppc-dev@lists.ozlabs.org
+Description:	read/write
+		Helps to control the dump collect feature from userspace.
+		Setting 1 to this file enables the system to collect the
+		dump and 0 to disable it.
+User:		Kdump service
+
+What:		/sys/kernel/fadump/release_mem
+Date:		Dec 2019
+Contact:	linuxppc-dev@lists.ozlabs.org
+Description:	write only
+		This is a special sysfs file and only available when
+		the system is booted to capture the vmcore using FADump.
+		It is used to release the memory reserved by FADump to
+		save the crash dump.
+
+What:		/sys/kernel/fadump/mem_reserved
+Date:		Dec 2019
+Contact:	linuxppc-dev@lists.ozlabs.org
+Description:	read only
+		Provide information about the amount of memory reserved by
+		FADump to save the crash dump in bytes.
--- a/Documentation/ABI/testing/sysfs-platform-dell-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-dell-laptop
@ -2,7 +2,7 @@ What:		/sys/class/leds/dell::kbd_backlight/als_enabled
 Date:		December 2014
 KernelVersion:	3.19
 Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
-		Pali Rohár <pali.rohar@gmail.com>
+		Pali Rohár <pali@kernel.org>
 Description:
 		This file allows to control the automatic keyboard
 		illumination mode on some systems that have an ambient
@ -13,7 +13,7 @@ What:		/sys/class/leds/dell::kbd_backlight/als_setting
 Date:		December 2014
 KernelVersion:	3.19
 Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
-		Pali Rohár <pali.rohar@gmail.com>
+		Pali Rohár <pali@kernel.org>
 Description:
 		This file allows to specifiy the on/off threshold value,
 		as reported by the ambient light sensor.
@ -22,7 +22,7 @@ What:		/sys/class/leds/dell::kbd_backlight/start_triggers
 Date:		December 2014
 KernelVersion:	3.19
 Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
-		Pali Rohár <pali.rohar@gmail.com>
+		Pali Rohár <pali@kernel.org>
 Description:
 		This file allows to control the input triggers that
 		turn on the keyboard backlight illumination that is
@ -45,7 +45,7 @@ What:		/sys/class/leds/dell::kbd_backlight/stop_timeout
 Date:		December 2014
 KernelVersion:	3.19
 Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
-		Pali Rohár <pali.rohar@gmail.com>
+		Pali Rohár <pali@kernel.org>
 Description:
 		This file allows to specify the interval after which the
 		keyboard illumination is disabled because of inactivity.
--- a/Documentation/ABI/testing/sysfs-tty
+++ b/Documentation/ABI/testing/sysfs-tty
@ -154,3 +154,10 @@ Description:
 		 device specification. For example, when user sets 7bytes on
 		 16550A, which has 1/4/8/14 bytes trigger, the RX trigger is
 		 automatically changed to 4 bytes.
+
+What:		/sys/class/tty/ttyS0/console
+Date:		February 2020
+Contact:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Description:
+		 Allows user to detach or attach back the given device as
+		 kernel console. It shows and accepts a boolean variable.
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@ -2,7 +2,8 @@
 # Makefile for Sphinx documentation
 #

-subdir-y := devicetree/bindings/
+# for cleaning
+subdir- := devicetree/bindings

 # Check for broken documentation file references
 ifeq ($(CONFIG_WARN_MISSING_DOCUMENTS),y)
@ -13,7 +14,7 @@ endif
 SPHINXBUILD   = sphinx-build
 SPHINXOPTS    =
 SPHINXDIRS    = .
-_SPHINXDIRS   = $(patsubst $(srctree)/Documentation/%/index.rst,%,$(wildcard $(srctree)/Documentation/*/index.rst))
+_SPHINXDIRS   = $(sort $(patsubst $(srctree)/Documentation/%/index.rst,%,$(wildcard $(srctree)/Documentation/*/index.rst)))
 SPHINX_CONF   = conf.py
 PAPER         =
 BUILDDIR      = $(obj)/output
--- a/Documentation/PCI/boot-interrupts.rst
+++ b/Documentation/PCI/boot-interrupts.rst
@ -0,0 +1,155 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Boot Interrupts
+===============
+
+:Author: - Sean V Kelley <sean.v.kelley@linux.intel.com>
+
+Overview
+========
+
+On PCI Express, interrupts are represented with either MSI or inbound
+interrupt messages (Assert_INTx/Deassert_INTx). The integrated IO-APIC in a
+given Core IO converts the legacy interrupt messages from PCI Express to
+MSI interrupts.  If the IO-APIC is disabled (via the mask bits in the
+IO-APIC table entries), the messages are routed to the legacy PCH. This
+in-band interrupt mechanism was traditionally necessary for systems that
+did not support the IO-APIC and for boot. Intel in the past has used the
+term "boot interrupts" to describe this mechanism. Further, the PCI Express
+protocol describes this in-band legacy wire-interrupt INTx mechanism for
+I/O devices to signal PCI-style level interrupts. The subsequent paragraphs
+describe problems with the Core IO handling of INTx message routing to the
+PCH and mitigation within BIOS and the OS.
+
+
+Issue
+=====
+
+When in-band legacy INTx messages are forwarded to the PCH, they in turn
+trigger a new interrupt for which the OS likely lacks a handler. When an
+interrupt goes unhandled over time, they are tracked by the Linux kernel as
+Spurious Interrupts. The IRQ will be disabled by the Linux kernel after it
+reaches a specific count with the error "nobody cared". This disabled IRQ
+now prevents valid usage by an existing interrupt which may happen to share
+the IRQ line.
+
+  irq 19: nobody cared (try booting with the "irqpoll" option)
+  CPU: 0 PID: 2988 Comm: irq/34-nipalk Tainted: 4.14.87-rt49-02410-g4a640ec-dirty #1
+  Hardware name: National Instruments NI PXIe-8880/NI PXIe-8880, BIOS 2.1.5f1 01/09/2020
+  Call Trace:
+  <IRQ>
+   ? dump_stack+0x46/0x5e
+   ? __report_bad_irq+0x2e/0xb0
+   ? note_interrupt+0x242/0x290
+   ? nNIKAL100_memoryRead16+0x8/0x10 [nikal]
+   ? handle_irq_event_percpu+0x55/0x70
+   ? handle_irq_event+0x4f/0x80
+   ? handle_fasteoi_irq+0x81/0x180
+   ? handle_irq+0x1c/0x30
+   ? do_IRQ+0x41/0xd0
+   ? common_interrupt+0x84/0x84
+  </IRQ>
+
+  handlers:
+  irq_default_primary_handler threaded usb_hcd_irq
+  Disabling IRQ #19
+
+
+Conditions
+==========
+
+The use of threaded interrupts is the most likely condition to trigger
+this problem today. Threaded interrupts may not be reenabled after the IRQ
+handler wakes. These "one shot" conditions mean that the threaded interrupt
+needs to keep the interrupt line masked until the threaded handler has run.
+Especially when dealing with high data rate interrupts, the thread needs to
+run to completion; otherwise some handlers will end up in stack overflows
+since the interrupt of the issuing device is still active.
+
+Affected Chipsets
+=================
+
+The legacy interrupt forwarding mechanism exists today in a number of
+devices including but not limited to chipsets from AMD/ATI, Broadcom, and
+Intel. Changes made through the mitigations below have been applied to
+drivers/pci/quirks.c
+
+Starting with ICX there are no longer any IO-APICs in the Core IO's
+devices.  IO-APIC is only in the PCH.  Devices connected to the Core IO's
+PCIe Root Ports will use native MSI/MSI-X mechanisms.
+
+Mitigations
+===========
+
+The mitigations take the form of PCI quirks. The preference has been to
+first identify and make use of a means to disable the routing to the PCH.
+In such a case a quirk to disable boot interrupt generation can be
+added.[1]
+
+  Intel® 6300ESB I/O Controller Hub
+  Alternate Base Address Register:
+   BIE: Boot Interrupt Enable
+	  0 = Boot interrupt is enabled.
+	  1 = Boot interrupt is disabled.
+
+  Intel® Sandy Bridge through Sky Lake based Xeon servers:
+  Coherent Interface Protocol Interrupt Control
+   dis_intx_route2pch/dis_intx_route2ich/dis_intx_route2dmi2:
+	  When this bit is set. Local INTx messages received from the
+	  Intel® Quick Data DMA/PCI Express ports are not routed to legacy
+	  PCH - they are either converted into MSI via the integrated IO-APIC
+	  (if the IO-APIC mask bit is clear in the appropriate entries)
+	  or cause no further action (when mask bit is set)
+
+In the absence of a way to directly disable the routing, another approach
+has been to make use of PCI Interrupt pin to INTx routing tables for
+purposes of redirecting the interrupt handler to the rerouted interrupt
+line by default.  Therefore, on chipsets where this INTx routing cannot be
+disabled, the Linux kernel will reroute the valid interrupt to its legacy
+interrupt. This redirection of the handler will prevent the occurrence of
+the spurious interrupt detection which would ordinarily disable the IRQ
+line due to excessive unhandled counts.[2]
+
+The config option X86_REROUTE_FOR_BROKEN_BOOT_IRQS exists to enable (or
+disable) the redirection of the interrupt handler to the PCH interrupt
+line. The option can be overridden by either pci=ioapicreroute or
+pci=noioapicreroute.[3]
+
+
+More Documentation
+==================
+
+There is an overview of the legacy interrupt handling in several datasheets
+(6300ESB and 6700PXH below). While largely the same, it provides insight
+into the evolution of its handling with chipsets.
+
+Example of disabling of the boot interrupt
+------------------------------------------
+
+Intel® 6300ESB I/O Controller Hub (Document # 300641-004US)
+	5.7.3 Boot Interrupt
+	https://www.intel.com/content/dam/doc/datasheet/6300esb-io-controller-hub-datasheet.pdf
+
+Intel® Xeon® Processor E5-1600/2400/2600/4600 v3 Product Families
+Datasheet - Volume 2: Registers (Document # 330784-003)
+	6.6.41 cipintrc Coherent Interface Protocol Interrupt Control
+	https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e5-v3-datasheet-vol-2.pdf
+
+Example of handler rerouting
+----------------------------
+
+Intel® 6700PXH 64-bit PCI Hub (Document # 302628)
+	2.15.2 PCI Express Legacy INTx Support and Boot Interrupt
+	https://www.intel.com/content/dam/doc/datasheet/6700pxh-64-bit-pci-hub-datasheet.pdf
+
+
+If you have any legacy PCI interrupt questions that aren't answered, email me.
+
+Cheers,
+    Sean V Kelley
+    sean.v.kelley@linux.intel.com
+
+[1] https://lore.kernel.org/r/12131949181903-git-send-email-sassmann@suse.de/
+[2] https://lore.kernel.org/r/12131949182094-git-send-email-sassmann@suse.de/
+[3] https://lore.kernel.org/r/487C8EA7.6020205@suse.de/
--- a/Documentation/PCI/index.rst
+++ b/Documentation/PCI/index.rst
@ -16,3 +16,4 @@ Linux PCI Bus Subsystem
   pci-error-recovery
   pcieaer-howto
   endpoint/index
+   boot-interrupts
--- a/Documentation/PCI/pci.rst
+++ b/Documentation/PCI/pci.rst
@ -239,7 +239,7 @@ from the PCI device config space. Use the values in the pci_dev structure
 as the PCI "bus address" might have been remapped to a "host physical"
 address by the arch/chip-set specific kernel support.

-See Documentation/io-mapping.txt for how to access device registers
+See Documentation/driver-api/io-mapping.rst for how to access device registers
 or device memory.

 The device driver needs to call pci_request_region() to verify
--- a/Documentation/PCI/pcieaer-howto.rst
+++ b/Documentation/PCI/pcieaer-howto.rst
@ -156,12 +156,6 @@ default reset_link function, but different upstream ports might
 have different specifications to reset pci express link, so all
 upstream ports should provide their own reset_link functions.

-In struct pcie_port_service_driver, a new pointer, reset_link, is
-added.
-::
-
-	pci_ers_result_t (*reset_link) (struct pci_dev *dev);
-
 Section 3.2.2.2 provides more detailed info on when to call
 reset_link.

@ -212,15 +206,10 @@ error_detected(dev, pci_channel_io_frozen) to all drivers within
 a hierarchy in question. Then, performing link reset at upstream is
 necessary. As different kinds of devices might use different approaches
 to reset link, AER port service driver is required to provide the
-function to reset link. Firstly, kernel looks for if the upstream
-component has an aer driver. If it has, kernel uses the reset_link
-callback of the aer driver. If the upstream component has no aer driver
-and the port is downstream port, we will perform a hot reset as the
-default by setting the Secondary Bus Reset bit of the Bridge Control
-register associated with the downstream port. As for upstream ports,
-they should provide their own aer service drivers with reset_link
-function. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER and
-reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes
+function to reset link via callback parameter of pcie_do_recovery()
+function. If reset_link is not NULL, recovery function will use it
+to reset the link. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER
+and reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes
 to mmio_enabled.

 helper functions
@ -243,9 +232,9 @@ messages to root port when an error is detected.

 ::

-  int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);`
+  int pci_aer_clear_nonfatal_status(struct pci_dev *dev);`

-pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
+pci_aer_clear_nonfatal_status clears non-fatal errors in the uncorrectable
 error status register.

 Frequent Asked Questions
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
@ -4,7 +4,7 @@ A Tour Through TREE_RCU's Grace-Period Memory Ordering

 August 8, 2017

-This article was contributed by Paul E.&nbsp;McKenney
+This article was contributed by Paul E. McKenney

 Introduction
 ============
@ -48,7 +48,7 @@ Tree RCU Grace Period Memory Ordering Building Blocks

 The workhorse for RCU's grace-period memory ordering is the
 critical section for the ``rcu_node`` structure's
-``-&gt;lock``. These critical sections use helper functions for lock
+``->lock``. These critical sections use helper functions for lock
 acquisition, including ``raw_spin_lock_rcu_node()``,
 ``raw_spin_lock_irq_rcu_node()``, and ``raw_spin_lock_irqsave_rcu_node()``.
 Their lock-release counterparts are ``raw_spin_unlock_rcu_node()``,
@ -102,9 +102,9 @@ lock-acquisition and lock-release functions::
   23   r3 = READ_ONCE(x);
   24 }
   25
-   26 WARN_ON(r1 == 0 &amp;&amp; r2 == 0 &amp;&amp; r3 == 0);
+   26 WARN_ON(r1 == 0 && r2 == 0 && r3 == 0);

-The ``WARN_ON()`` is evaluated at &ldquo;the end of time&rdquo;,
+The ``WARN_ON()`` is evaluated at "the end of time",
 after all changes have propagated throughout the system.
 Without the ``smp_mb__after_unlock_lock()`` provided by the
 acquisition functions, this ``WARN_ON()`` could trigger, for example
--- a/Documentation/RCU/listRCU.rst
+++ b/Documentation/RCU/listRCU.rst
@ -4,12 +4,61 @@ Using RCU to Protect Read-Mostly Linked Lists
 =============================================

 One of the best applications of RCU is to protect read-mostly linked lists
-("struct list_head" in list.h).  One big advantage of this approach
+(``struct list_head`` in list.h).  One big advantage of this approach
 is that all of the required memory barriers are included for you in
 the list macros.  This document describes several applications of RCU,
 with the best fits first.

-Example 1: Read-Side Action Taken Outside of Lock, No In-Place Updates
+
+Example 1: Read-mostly list: Deferred Destruction
+-------------------------------------------------
+
+A widely used usecase for RCU lists in the kernel is lockless iteration over
+all processes in the system. ``task_struct::tasks`` represents the list node that
+links all the processes. The list can be traversed in parallel to any list
+additions or removals.
+
+The traversal of the list is done using ``for_each_process()`` which is defined
+by the 2 macros::
+
+	#define next_task(p) \
+		list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
+
+	#define for_each_process(p) \
+		for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+
+The code traversing the list of all processes typically looks like::
+
+	rcu_read_lock();
+	for_each_process(p) {
+		/* Do something with p */
+	}
+	rcu_read_unlock();
+
+The simplified code for removing a process from a task list is::
+
+	void release_task(struct task_struct *p)
+	{
+		write_lock(&tasklist_lock);
+		list_del_rcu(&p->tasks);
+		write_unlock(&tasklist_lock);
+		call_rcu(&p->rcu, delayed_put_task_struct);
+	}
+
+When a process exits, ``release_task()`` calls ``list_del_rcu(&p->tasks)`` under
+``tasklist_lock`` writer lock protection, to remove the task from the list of
+all tasks. The ``tasklist_lock`` prevents concurrent list additions/removals
+from corrupting the list. Readers using ``for_each_process()`` are not protected
+with the ``tasklist_lock``. To prevent readers from noticing changes in the list
+pointers, the ``task_struct`` object is freed only after one or more grace
+periods elapse (with the help of call_rcu()). This deferring of destruction
+ensures that any readers traversing the list will see valid ``p->tasks.next``
+pointers and deletion/freeing can happen in parallel with traversal of the list.
+This pattern is also called an **existence lock**, since RCU pins the object in
+memory until all existing readers finish.
+
+
+Example 2: Read-Side Action Taken Outside of Lock: No In-Place Updates
 ----------------------------------------------------------------------

 The best applications are cases where, if reader-writer locking were
@ -26,7 +75,7 @@ added or deleted, rather than being modified in place.

 A straightforward example of this use of RCU may be found in the
 system-call auditing support.  For example, a reader-writer locked
-implementation of audit_filter_task() might be as follows::
+implementation of ``audit_filter_task()`` might be as follows::

 	static enum audit_state audit_filter_task(struct task_struct *tsk)
 	{
@ -34,7 +83,7 @@ implementation of audit_filter_task() might be as follows::
 		enum audit_state   state;

 		read_lock(&auditsc_lock);
-		/* Note: audit_netlink_sem held by caller. */
+		/* Note: audit_filter_mutex held by caller. */
 		list_for_each_entry(e, &audit_tsklist, list) {
 			if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
 				read_unlock(&auditsc_lock);
@ -58,7 +107,7 @@ This means that RCU can be easily applied to the read side, as follows::
 		enum audit_state   state;

 		rcu_read_lock();
-		/* Note: audit_netlink_sem held by caller. */
+		/* Note: audit_filter_mutex held by caller. */
 		list_for_each_entry_rcu(e, &audit_tsklist, list) {
 			if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
 				rcu_read_unlock();
@ -69,18 +118,18 @@ This means that RCU can be easily applied to the read side, as follows::
 		return AUDIT_BUILD_CONTEXT;
 	}

-The read_lock() and read_unlock() calls have become rcu_read_lock()
+The ``read_lock()`` and ``read_unlock()`` calls have become rcu_read_lock()
 and rcu_read_unlock(), respectively, and the list_for_each_entry() has
-become list_for_each_entry_rcu().  The _rcu() list-traversal primitives
+become list_for_each_entry_rcu().  The **_rcu()** list-traversal primitives
 insert the read-side memory barriers that are required on DEC Alpha CPUs.

-The changes to the update side are also straightforward.  A reader-writer
-lock might be used as follows for deletion and insertion::
+The changes to the update side are also straightforward. A reader-writer lock
+might be used as follows for deletion and insertion::

 	static inline int audit_del_rule(struct audit_rule *rule,
 					 struct list_head *list)
 	{
-		struct audit_entry  *e;
+		struct audit_entry *e;

 		write_lock(&auditsc_lock);
 		list_for_each_entry(e, list, list) {
@ -113,9 +162,9 @@ Following are the RCU equivalents for these two functions::
 	static inline int audit_del_rule(struct audit_rule *rule,
 					 struct list_head *list)
 	{
-		struct audit_entry  *e;
+		struct audit_entry *e;

-		/* Do not use the _rcu iterator here, since this is the only
+		/* No need to use the _rcu iterator here, since this is the only
 		 * deletion routine. */
 		list_for_each_entry(e, list, list) {
 			if (!audit_compare_rule(rule, &e->rule)) {
@ -139,45 +188,45 @@ Following are the RCU equivalents for these two functions::
 		return 0;
 	}

-Normally, the write_lock() and write_unlock() would be replaced by
-a spin_lock() and a spin_unlock(), but in this case, all callers hold
-audit_netlink_sem, so no additional locking is required.  The auditsc_lock
-can therefore be eliminated, since use of RCU eliminates the need for
-writers to exclude readers.  Normally, the write_lock() calls would
-be converted into spin_lock() calls.
+Normally, the ``write_lock()`` and ``write_unlock()`` would be replaced by a
+spin_lock() and a spin_unlock(). But in this case, all callers hold
+``audit_filter_mutex``, so no additional locking is required. The
+``auditsc_lock`` can therefore be eliminated, since use of RCU eliminates the
+need for writers to exclude readers.

 The list_del(), list_add(), and list_add_tail() primitives have been
 replaced by list_del_rcu(), list_add_rcu(), and list_add_tail_rcu().
-The _rcu() list-manipulation primitives add memory barriers that are
-needed on weakly ordered CPUs (most of them!).  The list_del_rcu()
-primitive omits the pointer poisoning debug-assist code that would
-otherwise cause concurrent readers to fail spectacularly.
+The **_rcu()** list-manipulation primitives add memory barriers that are needed on
+weakly ordered CPUs (most of them!).  The list_del_rcu() primitive omits the
+pointer poisoning debug-assist code that would otherwise cause concurrent
+readers to fail spectacularly.

-So, when readers can tolerate stale data and when entries are either added
-or deleted, without in-place modification, it is very easy to use RCU!
+So, when readers can tolerate stale data and when entries are either added or
+deleted, without in-place modification, it is very easy to use RCU!

-Example 2: Handling In-Place Updates
+
+Example 3: Handling In-Place Updates
 ------------------------------------

-The system-call auditing code does not update auditing rules in place.
-However, if it did, reader-writer-locked code to do so might look as
-follows (presumably, the field_count is only permitted to decrease,
-otherwise, the added fields would need to be filled in)::
+The system-call auditing code does not update auditing rules in place.  However,
+if it did, the reader-writer-locked code to do so might look as follows
+(assuming only ``field_count`` is updated, otherwise, the added fields would
+need to be filled in)::

 	static inline int audit_upd_rule(struct audit_rule *rule,
 					 struct list_head *list,
 					 __u32 newaction,
 					 __u32 newfield_count)
 	{
-		struct audit_entry  *e;
-		struct audit_newentry *ne;
+		struct audit_entry *e;
+		struct audit_entry *ne;

 		write_lock(&auditsc_lock);
-		/* Note: audit_netlink_sem held by caller. */
+		/* Note: audit_filter_mutex held by caller. */
 		list_for_each_entry(e, list, list) {
 			if (!audit_compare_rule(rule, &e->rule)) {
 				e->rule.action = newaction;
-				e->rule.file_count = newfield_count;
+				e->rule.field_count = newfield_count;
 				write_unlock(&auditsc_lock);
 				return 0;
 			}
@ -188,16 +237,16 @@ otherwise, the added fields would need to be filled in)::

 The RCU version creates a copy, updates the copy, then replaces the old
 entry with the newly updated entry.  This sequence of actions, allowing
-concurrent reads while doing a copy to perform an update, is what gives
-RCU ("read-copy update") its name.  The RCU code is as follows::
+concurrent reads while making a copy to perform an update, is what gives
+RCU (*read-copy update*) its name.  The RCU code is as follows::

 	static inline int audit_upd_rule(struct audit_rule *rule,
 					 struct list_head *list,
 					 __u32 newaction,
 					 __u32 newfield_count)
 	{
-		struct audit_entry  *e;
-		struct audit_newentry *ne;
+		struct audit_entry *e;
+		struct audit_entry *ne;

 		list_for_each_entry(e, list, list) {
 			if (!audit_compare_rule(rule, &e->rule)) {
@ -206,7 +255,7 @@ RCU ("read-copy update") its name.  The RCU code is as follows::
 					return -ENOMEM;
 				audit_copy_rule(&ne->rule, &e->rule);
 				ne->rule.action = newaction;
-				ne->rule.file_count = newfield_count;
+				ne->rule.field_count = newfield_count;
 				list_replace_rcu(&e->list, &ne->list);
 				call_rcu(&e->rcu, audit_free_rule);
 				return 0;
@ -215,34 +264,45 @@ RCU ("read-copy update") its name.  The RCU code is as follows::
 		return -EFAULT;		/* No matching rule */
 	}

-Again, this assumes that the caller holds audit_netlink_sem.  Normally,
-the reader-writer lock would become a spinlock in this sort of code.
+Again, this assumes that the caller holds ``audit_filter_mutex``.  Normally, the
+writer lock would become a spinlock in this sort of code.

-Example 3: Eliminating Stale Data
+Another use of this pattern can be found in the openswitch driver's *connection
+tracking table* code in ``ct_limit_set()``.  The table holds connection tracking
+entries and has a limit on the maximum entries.  There is one such table
+per-zone and hence one *limit* per zone.  The zones are mapped to their limits
+through a hashtable using an RCU-managed hlist for the hash chains. When a new
+limit is set, a new limit object is allocated and ``ct_limit_set()`` is called
+to replace the old limit object with the new one using list_replace_rcu().
+The old limit object is then freed after a grace period using kfree_rcu().
+
+
+Example 4: Eliminating Stale Data
 ---------------------------------

-The auditing examples above tolerate stale data, as do most algorithms
+The auditing example above tolerates stale data, as do most algorithms
 that are tracking external state.  Because there is a delay from the
 time the external state changes before Linux becomes aware of the change,
-additional RCU-induced staleness is normally not a problem.
+additional RCU-induced staleness is generally not a problem.

 However, there are many examples where stale data cannot be tolerated.
-One example in the Linux kernel is the System V IPC (see the ipc_lock()
-function in ipc/util.c).  This code checks a "deleted" flag under a
-per-entry spinlock, and, if the "deleted" flag is set, pretends that the
+One example in the Linux kernel is the System V IPC (see the shm_lock()
+function in ipc/shm.c).  This code checks a *deleted* flag under a
+per-entry spinlock, and, if the *deleted* flag is set, pretends that the
 entry does not exist.  For this to be helpful, the search function must
-return holding the per-entry spinlock, as ipc_lock() does in fact do.
+return holding the per-entry spinlock, as shm_lock() does in fact do.
+
+.. _quick_quiz:

 Quick Quiz:
-	Why does the search function need to return holding the per-entry lock for
-	this deleted-flag technique to be helpful?
+	For the deleted-flag technique to be helpful, why is it necessary
+	to hold the per-entry lock while returning from the search function?

-:ref:`Answer to Quick Quiz <answer_quick_quiz_list>`
+:ref:`Answer to Quick Quiz <quick_quiz_answer>`

-If the system-call audit module were to ever need to reject stale data,
-one way to accomplish this would be to add a "deleted" flag and a "lock"
-spinlock to the audit_entry structure, and modify audit_filter_task()
-as follows::
+If the system-call audit module were to ever need to reject stale data, one way
+to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to the
+audit_entry structure, and modify ``audit_filter_task()`` as follows::

 	static enum audit_state audit_filter_task(struct task_struct *tsk)
 	{
@ -267,20 +327,20 @@ as follows::
 	}

 Note that this example assumes that entries are only added and deleted.
-Additional mechanism is required to deal correctly with the
-update-in-place performed by audit_upd_rule().  For one thing,
-audit_upd_rule() would need additional memory barriers to ensure
-that the list_add_rcu() was really executed before the list_del_rcu().
+Additional mechanism is required to deal correctly with the update-in-place
+performed by ``audit_upd_rule()``.  For one thing, ``audit_upd_rule()`` would
+need additional memory barriers to ensure that the list_add_rcu() was really
+executed before the list_del_rcu().

-The audit_del_rule() function would need to set the "deleted"
-flag under the spinlock as follows::
+The ``audit_del_rule()`` function would need to set the ``deleted`` flag under the
+spinlock as follows::

 	static inline int audit_del_rule(struct audit_rule *rule,
 					 struct list_head *list)
 	{
-		struct audit_entry  *e;
+		struct audit_entry *e;

-		/* Do not need to use the _rcu iterator here, since this
+		/* No need to use the _rcu iterator here, since this
 		 * is the only deletion routine. */
 		list_for_each_entry(e, list, list) {
 			if (!audit_compare_rule(rule, &e->rule)) {
@ -295,6 +355,91 @@ flag under the spinlock as follows::
 		return -EFAULT;		/* No matching rule */
 	}

+This too assumes that the caller holds ``audit_filter_mutex``.
+
+
+Example 5: Skipping Stale Objects
+---------------------------------
+
+For some usecases, reader performance can be improved by skipping stale objects
+during read-side list traversal if the object in concern is pending destruction
+after one or more grace periods. One such example can be found in the timerfd
+subsystem. When a ``CLOCK_REALTIME`` clock is reprogrammed - for example due to
+setting of the system time, then all programmed timerfds that depend on this
+clock get triggered and processes waiting on them to expire are woken up in
+advance of their scheduled expiry. To facilitate this, all such timers are added
+to an RCU-managed ``cancel_list`` when they are setup in
+``timerfd_setup_cancel()``::
+
+	static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
+	{
+		spin_lock(&ctx->cancel_lock);
+		if ((ctx->clockid == CLOCK_REALTIME &&
+		    (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
+			if (!ctx->might_cancel) {
+				ctx->might_cancel = true;
+				spin_lock(&cancel_lock);
+				list_add_rcu(&ctx->clist, &cancel_list);
+				spin_unlock(&cancel_lock);
+			}
+		}
+		spin_unlock(&ctx->cancel_lock);
+	}
+
+When a timerfd is freed (fd is closed), then the ``might_cancel`` flag of the
+timerfd object is cleared, the object removed from the ``cancel_list`` and
+destroyed::
+
+	int timerfd_release(struct inode *inode, struct file *file)
+	{
+		struct timerfd_ctx *ctx = file->private_data;
+
+		spin_lock(&ctx->cancel_lock);
+		if (ctx->might_cancel) {
+			ctx->might_cancel = false;
+			spin_lock(&cancel_lock);
+			list_del_rcu(&ctx->clist);
+			spin_unlock(&cancel_lock);
+		}
+		spin_unlock(&ctx->cancel_lock);
+
+		hrtimer_cancel(&ctx->t.tmr);
+		kfree_rcu(ctx, rcu);
+		return 0;
+	}
+
+If the ``CLOCK_REALTIME`` clock is set, for example by a time server, the
+hrtimer framework calls ``timerfd_clock_was_set()`` which walks the
+``cancel_list`` and wakes up processes waiting on the timerfd. While iterating
+the ``cancel_list``, the ``might_cancel`` flag is consulted to skip stale
+objects::
+
+	void timerfd_clock_was_set(void)
+	{
+		struct timerfd_ctx *ctx;
+		unsigned long flags;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(ctx, &cancel_list, clist) {
+			if (!ctx->might_cancel)
+				continue;
+			spin_lock_irqsave(&ctx->wqh.lock, flags);
+			if (ctx->moffs != ktime_mono_to_real(0)) {
+				ctx->moffs = KTIME_MAX;
+				ctx->ticks++;
+				wake_up_locked_poll(&ctx->wqh, EPOLLIN);
+			}
+			spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+		}
+		rcu_read_unlock();
+	}
+
+The key point here is, because RCU-traversal of the ``cancel_list`` happens
+while objects are being added and removed to the list, sometimes the traversal
+can step on an object that has been removed from the list. In this example, it
+is seen that it is better to skip such objects using a flag.
+
+
 Summary
 -------

@ -303,19 +448,21 @@ the most amenable to use of RCU.  The simplest case is where entries are
 either added or deleted from the data structure (or atomically modified
 in place), but non-atomic in-place modifications can be handled by making
 a copy, updating the copy, then replacing the original with the copy.
-If stale data cannot be tolerated, then a "deleted" flag may be used
+If stale data cannot be tolerated, then a *deleted* flag may be used
 in conjunction with a per-entry spinlock in order to allow the search
 function to reject newly deleted data.

-.. _answer_quick_quiz_list:
+.. _quick_quiz_answer:

 Answer to Quick Quiz:
-	Why does the search function need to return holding the per-entry
-	lock for this deleted-flag technique to be helpful?
+	For the deleted-flag technique to be helpful, why is it necessary
+	to hold the per-entry lock while returning from the search function?

 	If the search function drops the per-entry lock before returning,
 	then the caller will be processing stale data in any case.  If it
 	is really OK to be processing stale data, then you don't need a
-	"deleted" flag.  If processing stale data really is a problem,
+	*deleted* flag.  If processing stale data really is a problem,
 	then you need to hold the per-entry lock across all of the code
 	that uses the value that was returned.
+
+:ref:`Back to Quick Quiz <quick_quiz>`
--- a/Documentation/RCU/rcu.rst
+++ b/Documentation/RCU/rcu.rst
@ -11,8 +11,8 @@ must be long enough that any readers accessing the item being deleted have
 since dropped their references.  For example, an RCU-protected deletion
 from a linked list would first remove the item from the list, wait for
 a grace period to elapse, then free the element.  See the
-Documentation/RCU/listRCU.rst file for more information on using RCU with
-linked lists.
+:ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` for more information on
+using RCU with linked lists.

 Frequently Asked Questions
 --------------------------
@ -50,7 +50,7 @@ Frequently Asked Questions
 - If I am running on a uniprocessor kernel, which can only do one
  thing at a time, why should I wait for a grace period?

-  See the Documentation/RCU/UP.rst file for more information.
+  See :ref:`Documentation/RCU/UP.rst <up_doc>` for more information.

 - How can I see where RCU is currently used in the Linux kernel?

@ -68,18 +68,18 @@ Frequently Asked Questions

 - Why the name "RCU"?

-  "RCU" stands for "read-copy update".  The file Documentation/RCU/listRCU.rst
-  has more information on where this name came from, search for
-  "read-copy update" to find it.
+  "RCU" stands for "read-copy update".
+  :ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` has more information on where
+  this name came from, search for "read-copy update" to find it.

 - I hear that RCU is patented?  What is with that?

  Yes, it is.  There are several known patents related to RCU,
-  search for the string "Patent" in RTFP.txt to find them.
+  search for the string "Patent" in Documentation/RCU/RTFP.txt to find them.
  Of these, one was allowed to lapse by the assignee, and the
  others have been contributed to the Linux kernel under GPL.
  There are now also LGPL implementations of user-level RCU
-  available (http://liburcu.org/).
+  available (https://liburcu.org/).

 - I hear that RCU needs work in order to support realtime kernels?

@ -88,5 +88,5 @@ Frequently Asked Questions

 - Where can I find more information on RCU?

-  See the RTFP.txt file in this directory.
+  See the Documentation/RCU/RTFP.txt file.
  Or point your browser at (http://www.rdrop.com/users/paulmck/RCU/).
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@ -124,9 +124,14 @@ using a dynamically allocated srcu_struct (hence "srcud-" rather than
 debugging.  The final "T" entry contains the totals of the counters.


-USAGE
+USAGE ON SPECIFIC KERNEL BUILDS

-The following script may be used to torture RCU:
+It is sometimes desirable to torture RCU on a specific kernel build,
+for example, when preparing to put that kernel build into production.
+In that case, the kernel should be built with CONFIG_RCU_TORTURE_TEST=m
+so that the test can be started using modprobe and terminated using rmmod.
+
+For example, the following script may be used to torture RCU:

 	#!/bin/sh

@ -142,8 +147,136 @@ checked for such errors.  The "rmmod" command forces a "SUCCESS",
 two are self-explanatory, while the last indicates that while there
 were no RCU failures, CPU-hotplug problems were detected.

-However, the tools/testing/selftests/rcutorture/bin/kvm.sh script
-provides better automation, including automatic failure analysis.
-It assumes a qemu/kvm-enabled platform, and runs guest OSes out of initrd.
-See tools/testing/selftests/rcutorture/doc/initrd.txt for instructions
-on setting up such an initrd.
+
+USAGE ON MAINLINE KERNELS
+
+When using rcutorture to test changes to RCU itself, it is often
+necessary to build a number of kernels in order to test that change
+across a broad range of combinations of the relevant Kconfig options
+and of the relevant kernel boot parameters.  In this situation, use
+of modprobe and rmmod can be quite time-consuming and error-prone.
+
+Therefore, the tools/testing/selftests/rcutorture/bin/kvm.sh
+script is available for mainline testing for x86, arm64, and
+powerpc.  By default, it will run the series of tests specified by
+tools/testing/selftests/rcutorture/configs/rcu/CFLIST, with each test
+running for 30 minutes within a guest OS using a minimal userspace
+supplied by an automatically generated initrd.  After the tests are
+complete, the resulting build products and console output are analyzed
+for errors and the results of the runs are summarized.
+
+On larger systems, rcutorture testing can be accelerated by passing the
+--cpus argument to kvm.sh.  For example, on a 64-CPU system, "--cpus 43"
+would use up to 43 CPUs to run tests concurrently, which as of v5.4 would
+complete all the scenarios in two batches, reducing the time to complete
+from about eight hours to about one hour (not counting the time to build
+the sixteen kernels).  The "--dryrun sched" argument will not run tests,
+but rather tell you how the tests would be scheduled into batches.  This
+can be useful when working out how many CPUs to specify in the --cpus
+argument.
+
+Not all changes require that all scenarios be run.  For example, a change
+to Tree SRCU might run only the SRCU-N and SRCU-P scenarios using the
+--configs argument to kvm.sh as follows:  "--configs 'SRCU-N SRCU-P'".
+Large systems can run multiple copies of of the full set of scenarios,
+for example, a system with 448 hardware threads can run five instances
+of the full set concurrently.  To make this happen:
+
+	kvm.sh --cpus 448 --configs '5*CFLIST'
+
+Alternatively, such a system can run 56 concurrent instances of a single
+eight-CPU scenario:
+
+	kvm.sh --cpus 448 --configs '56*TREE04'
+
+Or 28 concurrent instances of each of two eight-CPU scenarios:
+
+	kvm.sh --cpus 448 --configs '28*TREE03 28*TREE04'
+
+Of course, each concurrent instance will use memory, which can be
+limited using the --memory argument, which defaults to 512M.  Small
+values for memory may require disabling the callback-flooding tests
+using the --bootargs parameter discussed below.
+
+Sometimes additional debugging is useful, and in such cases the --kconfig
+parameter to kvm.sh may be used, for example, "--kconfig 'CONFIG_KASAN=y'".
+
+Kernel boot arguments can also be supplied, for example, to control
+rcutorture's module parameters.  For example, to test a change to RCU's
+CPU stall-warning code, use "--bootargs 'rcutorture.stall_cpu=30'".
+This will of course result in the scripting reporting a failure, namely
+the resuling RCU CPU stall warning.  As noted above, reducing memory may
+require disabling rcutorture's callback-flooding tests:
+
+	kvm.sh --cpus 448 --configs '56*TREE04' --memory 128M \
+		--bootargs 'rcutorture.fwd_progress=0'
+
+Sometimes all that is needed is a full set of kernel builds.  This is
+what the --buildonly argument does.
+
+Finally, the --trust-make argument allows each kernel build to reuse what
+it can from the previous kernel build.
+
+There are additional more arcane arguments that are documented in the
+source code of the kvm.sh script.
+
+If a run contains failures, the number of buildtime and runtime failures
+is listed at the end of the kvm.sh output, which you really should redirect
+to a file.  The build products and console output of each run is kept in
+tools/testing/selftests/rcutorture/res in timestamped directories.  A
+given directory can be supplied to kvm-find-errors.sh in order to have
+it cycle you through summaries of errors and full error logs.  For example:
+
+	tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh \
+		tools/testing/selftests/rcutorture/res/2020.01.20-15.54.23
+
+However, it is often more convenient to access the files directly.
+Files pertaining to all scenarios in a run reside in the top-level
+directory (2020.01.20-15.54.23 in the example above), while per-scenario
+files reside in a subdirectory named after the scenario (for example,
+"TREE04").  If a given scenario ran more than once (as in "--configs
+'56*TREE04'" above), the directories corresponding to the second and
+subsequent runs of that scenario include a sequence number, for example,
+"TREE04.2", "TREE04.3", and so on.
+
+The most frequently used file in the top-level directory is testid.txt.
+If the test ran in a git repository, then this file contains the commit
+that was tested and any uncommitted changes in diff format.
+
+The most frequently used files in each per-scenario-run directory are:
+
+.config: This file contains the Kconfig options.
+
+Make.out: This contains build output for a specific scenario.
+
+console.log: This contains the console output for a specific scenario.
+	This file may be examined once the kernel has booted, but
+	it might not exist if the build failed.
+
+vmlinux: This contains the kernel, which can be useful with tools like
+	objdump and gdb.
+
+A number of additional files are available, but are less frequently used.
+Many are intended for debugging of rcutorture itself or of its scripting.
+
+As of v5.4, a successful run with the default set of scenarios produces
+the following summary at the end of the run on a 12-CPU system:
+
+SRCU-N ------- 804233 GPs (148.932/s) [srcu: g10008272 f0x0 ]
+SRCU-P ------- 202320 GPs (37.4667/s) [srcud: g1809476 f0x0 ]
+SRCU-t ------- 1122086 GPs (207.794/s) [srcu: g0 f0x0 ]
+SRCU-u ------- 1111285 GPs (205.794/s) [srcud: g1 f0x0 ]
+TASKS01 ------- 19666 GPs (3.64185/s) [tasks: g0 f0x0 ]
+TASKS02 ------- 20541 GPs (3.80389/s) [tasks: g0 f0x0 ]
+TASKS03 ------- 19416 GPs (3.59556/s) [tasks: g0 f0x0 ]
+TINY01 ------- 836134 GPs (154.84/s) [rcu: g0 f0x0 ] n_max_cbs: 34198
+TINY02 ------- 850371 GPs (157.476/s) [rcu: g0 f0x0 ] n_max_cbs: 2631
+TREE01 ------- 162625 GPs (30.1157/s) [rcu: g1124169 f0x0 ]
+TREE02 ------- 333003 GPs (61.6672/s) [rcu: g2647753 f0x0 ] n_max_cbs: 35844
+TREE03 ------- 306623 GPs (56.782/s) [rcu: g2975325 f0x0 ] n_max_cbs: 1496497
+CPU count limited from 16 to 12
+TREE04 ------- 246149 GPs (45.5831/s) [rcu: g1695737 f0x0 ] n_max_cbs: 434961
+TREE05 ------- 314603 GPs (58.2598/s) [rcu: g2257741 f0x2 ] n_max_cbs: 193997
+TREE07 ------- 167347 GPs (30.9902/s) [rcu: g1079021 f0x0 ] n_max_cbs: 478732
+CPU count limited from 16 to 12
+TREE09 ------- 752238 GPs (139.303/s) [rcu: g13075057 f0x0 ] n_max_cbs: 99011
--- a/Documentation/accounting/psi.rst
+++ b/Documentation/accounting/psi.rst
@ -1,3 +1,5 @@
+.. _psi:
+
 ================================
 PSI - Pressure Stall Information
 ================================
--- a/Documentation/admin-guide/binderfs.rst
+++ b/Documentation/admin-guide/binderfs.rst
@ -33,6 +33,12 @@ max
  a per-instance limit. If ``max=<count>`` is set then only ``<count>`` number
  of binder devices can be allocated in this binderfs instance.

+stats
+  Using ``stats=global`` enables global binder statistics.
+  ``stats=global`` is only available for a binderfs instance mounted in the
+  initial user namespace. An attempt to use the option to mount a binderfs
+  instance in another user namespace will return a permission error.
+
 Allocating binder Devices
 -------------------------

--- a/Documentation/admin-guide/binfmt-misc.rst
+++ b/Documentation/admin-guide/binfmt-misc.rst
@ -1,5 +1,5 @@
-Kernel Support for miscellaneous (your favourite) Binary Formats v1.1
-=====================================================================
+Kernel Support for miscellaneous Binary Formats (binfmt_misc)
+=============================================================

 This Kernel feature allows you to invoke almost (for restrictions see below)
 every program by simply typing its name in the shell.
@ -140,8 +140,8 @@ Hints
 -----

 If you want to pass special arguments to your interpreter, you can
-write a wrapper script for it. See Documentation/admin-guide/java.rst for an
-example.
+write a wrapper script for it.
+See :doc:`Documentation/admin-guide/java.rst <./java>` for an example.

 Your interpreter should NOT look in the PATH for the filename; the kernel
 passes it the full filename (or the file descriptor) to use.  Using ``$PATH`` can
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@ -251,8 +251,6 @@ line of text and contains the following stats separated by whitespace:

 ================ =============================================================
 orig_data_size   uncompressed size of data stored in this disk.
-		  This excludes same-element-filled pages (same_pages) since
-		  no memory is allocated for them.
                  Unit: bytes
 compr_data_size  compressed size of data stored in this disk
 mem_used_total   the amount of memory allocated for this disk. This
--- a/Documentation/admin-guide/bootconfig.rst
+++ b/Documentation/admin-guide/bootconfig.rst
@ -23,7 +23,7 @@ of dot-connected-words, and key and value are connected by ``=``. The value
 has to be terminated by semi-colon (``;``) or newline (``\n``).
 For array value, array entries are separated by comma (``,``). ::

-KEY[.WORD[...]] = VALUE[, VALUE2[...]][;]
+  KEY[.WORD[...]] = VALUE[, VALUE2[...]][;]

 Unlike the kernel command line syntax, spaces are OK around the comma and ``=``.

--- a/Documentation/admin-guide/cgroup-v1/cpusets.rst
+++ b/Documentation/admin-guide/cgroup-v1/cpusets.rst
@ -223,6 +223,17 @@ cpu_online_mask using a CPU hotplug notifier, and the mems file
 automatically tracks the value of node_states[N_MEMORY]--i.e.,
 nodes with memory--using the cpuset_track_online_nodes() hook.

+The cpuset.effective_cpus and cpuset.effective_mems files are
+normally read-only copies of cpuset.cpus and cpuset.mems files
+respectively.  If the cpuset cgroup filesystem is mounted with the
+special "cpuset_v2_mode" option, the behavior of these files will become
+similar to the corresponding files in cpuset v2.  In other words, hotplug
+events will not change cpuset.cpus and cpuset.mems.  Those events will
+only affect cpuset.effective_cpus and cpuset.effective_mems which show
+the actual cpus and memory nodes that are currently used by this cpuset.
+See Documentation/admin-guide/cgroup-v2.rst for more information about
+cpuset v2 behavior.
+

 1.4 What are exclusive cpusets ?
 --------------------------------
--- a/Documentation/admin-guide/cgroup-v1/hugetlb.rst
+++ b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
@ -2,13 +2,6 @@
 HugeTLB Controller
 ==================

-The HugeTLB controller allows to limit the HugeTLB usage per control group and
-enforces the controller limit during page fault. Since HugeTLB doesn't
-support page reclaim, enforcing the limit at page fault time implies that,
-the application will get SIGBUS signal if it tries to access HugeTLB pages
-beyond its limit. This requires the application to know beforehand how much
-HugeTLB pages it would require for its use.
-
 HugeTLB controller can be created by first mounting the cgroup filesystem.

 # mount -t cgroup -o hugetlb none /sys/fs/cgroup
@ -28,10 +21,14 @@ process (bash) into it.

 Brief summary of control files::

- hugetlb.<hugepagesize>.limit_in_bytes     # set/show limit of "hugepagesize" hugetlb usage
- hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb  usage recorded
- hugetlb.<hugepagesize>.usage_in_bytes     # show current usage for "hugepagesize" hugetlb
- hugetlb.<hugepagesize>.failcnt		   # show the number of allocation failure due to HugeTLB limit
+ hugetlb.<hugepagesize>.rsvd.limit_in_bytes            # set/show limit of "hugepagesize" hugetlb reservations
+ hugetlb.<hugepagesize>.rsvd.max_usage_in_bytes        # show max "hugepagesize" hugetlb reservations and no-reserve faults
+ hugetlb.<hugepagesize>.rsvd.usage_in_bytes            # show current reservations and no-reserve faults for "hugepagesize" hugetlb
+ hugetlb.<hugepagesize>.rsvd.failcnt                   # show the number of allocation failure due to HugeTLB reservation limit
+ hugetlb.<hugepagesize>.limit_in_bytes                 # set/show limit of "hugepagesize" hugetlb faults
+ hugetlb.<hugepagesize>.max_usage_in_bytes             # show max "hugepagesize" hugetlb  usage recorded
+ hugetlb.<hugepagesize>.usage_in_bytes                 # show current usage for "hugepagesize" hugetlb
+ hugetlb.<hugepagesize>.failcnt                        # show the number of allocation failure due to HugeTLB usage limit

 For a system supporting three hugepage sizes (64k, 32M and 1G), the control
 files include::
@ -40,11 +37,95 @@ files include::
  hugetlb.1GB.max_usage_in_bytes
  hugetlb.1GB.usage_in_bytes
  hugetlb.1GB.failcnt
+  hugetlb.1GB.rsvd.limit_in_bytes
+  hugetlb.1GB.rsvd.max_usage_in_bytes
+  hugetlb.1GB.rsvd.usage_in_bytes
+  hugetlb.1GB.rsvd.failcnt
  hugetlb.64KB.limit_in_bytes
  hugetlb.64KB.max_usage_in_bytes
  hugetlb.64KB.usage_in_bytes
  hugetlb.64KB.failcnt
+  hugetlb.64KB.rsvd.limit_in_bytes
+  hugetlb.64KB.rsvd.max_usage_in_bytes
+  hugetlb.64KB.rsvd.usage_in_bytes
+  hugetlb.64KB.rsvd.failcnt
  hugetlb.32MB.limit_in_bytes
  hugetlb.32MB.max_usage_in_bytes
  hugetlb.32MB.usage_in_bytes
  hugetlb.32MB.failcnt
+  hugetlb.32MB.rsvd.limit_in_bytes
+  hugetlb.32MB.rsvd.max_usage_in_bytes
+  hugetlb.32MB.rsvd.usage_in_bytes
+  hugetlb.32MB.rsvd.failcnt
+
+
+1. Page fault accounting
+
+hugetlb.<hugepagesize>.limit_in_bytes
+hugetlb.<hugepagesize>.max_usage_in_bytes
+hugetlb.<hugepagesize>.usage_in_bytes
+hugetlb.<hugepagesize>.failcnt
+
+The HugeTLB controller allows users to limit the HugeTLB usage (page fault) per
+control group and enforces the limit during page fault. Since HugeTLB
+doesn't support page reclaim, enforcing the limit at page fault time implies
+that, the application will get SIGBUS signal if it tries to fault in HugeTLB
+pages beyond its limit. Therefore the application needs to know exactly how many
+HugeTLB pages it uses before hand, and the sysadmin needs to make sure that
+there are enough available on the machine for all the users to avoid processes
+getting SIGBUS.
+
+
+2. Reservation accounting
+
+hugetlb.<hugepagesize>.rsvd.limit_in_bytes
+hugetlb.<hugepagesize>.rsvd.max_usage_in_bytes
+hugetlb.<hugepagesize>.rsvd.usage_in_bytes
+hugetlb.<hugepagesize>.rsvd.failcnt
+
+The HugeTLB controller allows to limit the HugeTLB reservations per control
+group and enforces the controller limit at reservation time and at the fault of
+HugeTLB memory for which no reservation exists. Since reservation limits are
+enforced at reservation time (on mmap or shget), reservation limits never causes
+the application to get SIGBUS signal if the memory was reserved before hand. For
+MAP_NORESERVE allocations, the reservation limit behaves the same as the fault
+limit, enforcing memory usage at fault time and causing the application to
+receive a SIGBUS if it's crossing its limit.
+
+Reservation limits are superior to page fault limits described above, since
+reservation limits are enforced at reservation time (on mmap or shget), and
+never causes the application to get SIGBUS signal if the memory was reserved
+before hand. This allows for easier fallback to alternatives such as
+non-HugeTLB memory for example. In the case of page fault accounting, it's very
+hard to avoid processes getting SIGBUS since the sysadmin needs precisely know
+the HugeTLB usage of all the tasks in the system and make sure there is enough
+pages to satisfy all requests. Avoiding tasks getting SIGBUS on overcommited
+systems is practically impossible with page fault accounting.
+
+
+3. Caveats with shared memory
+
+For shared HugeTLB memory, both HugeTLB reservation and page faults are charged
+to the first task that causes the memory to be reserved or faulted, and all
+subsequent uses of this reserved or faulted memory is done without charging.
+
+Shared HugeTLB memory is only uncharged when it is unreserved or deallocated.
+This is usually when the HugeTLB file is deleted, and not when the task that
+caused the reservation or fault has exited.
+
+
+4. Caveats with HugeTLB cgroup offline.
+
+When a HugeTLB cgroup goes offline with some reservations or faults still
+charged to it, the behavior is as follows:
+
+- The fault charges are charged to the parent HugeTLB cgroup (reparented),
+- the reservation charges remain on the offline HugeTLB cgroup.
+
+This means that if a HugeTLB cgroup gets offlined while there is still HugeTLB
+reservations charged to it, that cgroup persists as a zombie until all HugeTLB
+reservations are uncharged. HugeTLB reservations behave in this manner to match
+the memory controller whose cgroups also persist as zombie until all charged
+memory is uncharged. Also, the tracking of HugeTLB reservations is a bit more
+complex compared to the tracking of HugeTLB faults, so it is significantly
+harder to reparent reservations at offline time.
--- a/Documentation/admin-guide/cgroup-v1/index.rst
+++ b/Documentation/admin-guide/cgroup-v1/index.rst
@ -1,3 +1,5 @@
+.. _cgroup-v1:
+
 ========================
 Control Groups version 1
 ========================
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@ -9,7 +9,7 @@ This is the authoritative documentation on the design, interface and
 conventions of cgroup v2.  It describes all userland-visible aspects
 of cgroup including core and specific controller behaviors.  All
 future changes must be reflected in this document.  Documentation for
-v1 is available under Documentation/admin-guide/cgroup-v1/.
+v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgroup-v1>`.

 .. CONTENTS

@ -188,6 +188,17 @@ cgroup v2 currently supports the following mount options.
        modified through remount from the init namespace. The mount
        option is ignored on non-init namespace mounts.

+  memory_recursiveprot
+
+        Recursively apply memory.min and memory.low protection to
+        entire subtrees, without requiring explicit downward
+        propagation into leaf cgroups.  This allows protecting entire
+        subtrees from one another, while retaining free competition
+        within those subtrees.  This should have been the default
+        behavior but is a mount-option to avoid regressing setups
+        relying on the original semantics (e.g. specifying bogusly
+        high 'bypass' protection values at higher tree levels).
+

 Organizing Processes and Threads
 --------------------------------
@ -1023,7 +1034,7 @@ All time durations are in microseconds.
 	A read-only nested-key file which exists on non-root cgroups.

 	Shows pressure stall information for CPU. See
-	Documentation/accounting/psi.rst for details.
+	:ref:`Documentation/accounting/psi.rst <psi>` for details.

  cpu.uclamp.min
        A read-write single value file which exists on non-root cgroups.
@ -1103,7 +1114,7 @@ PAGE_SIZE multiple when read back.
 	proportionally to the overage, reducing reclaim pressure for
 	smaller overages.

-       Effective min boundary is limited by memory.min values of
+	Effective min boundary is limited by memory.min values of
 	all ancestor cgroups. If there is memory.min overcommitment
 	(child cgroup or cgroups are requiring more protected memory
 	than parent will allow), then each child cgroup will get
@ -1313,53 +1324,41 @@ PAGE_SIZE multiple when read back.
 		Number of major page faults incurred

 	  workingset_refault
-
 		Number of refaults of previously evicted pages

 	  workingset_activate
-
 		Number of refaulted pages that were immediately activated

 	  workingset_nodereclaim
-
 		Number of times a shadow node has been reclaimed

 	  pgrefill
-
 		Amount of scanned pages (in an active LRU list)

 	  pgscan
-
 		Amount of scanned pages (in an inactive LRU list)

 	  pgsteal
-
 		Amount of reclaimed pages

 	  pgactivate
-
 		Amount of pages moved to the active LRU list

 	  pgdeactivate
-
 		Amount of pages moved to the inactive LRU list

 	  pglazyfree
-
 		Amount of pages postponed to be freed under memory pressure

 	  pglazyfreed
-
 		Amount of reclaimed lazyfree pages

 	  thp_fault_alloc
-
 		Number of transparent hugepages which were allocated to satisfy
 		a page fault, including COW faults. This counter is not present
 		when CONFIG_TRANSPARENT_HUGEPAGE is not set.

 	  thp_collapse_alloc
-
 		Number of transparent hugepages which were allocated to allow
 		collapsing an existing range of pages. This counter is not
 		present when CONFIG_TRANSPARENT_HUGEPAGE is not set.
@ -1403,7 +1402,7 @@ PAGE_SIZE multiple when read back.
 	A read-only nested-key file which exists on non-root cgroups.

 	Shows pressure stall information for memory. See
-	Documentation/accounting/psi.rst for details.
+	:ref:`Documentation/accounting/psi.rst <psi>` for details.


 Usage Guidelines
@ -1478,7 +1477,7 @@ IO Interface Files
 	  dios		Number of discard IOs
 	  ======	=====================

-	An example read output follows:
+	An example read output follows::

 	  8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353 dbytes=0 dios=0
 	  8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 dbytes=50331648 dios=3021
@ -1643,7 +1642,7 @@ IO Interface Files
 	A read-only nested-key file which exists on non-root cgroups.

 	Shows pressure stall information for IO. See
-	Documentation/accounting/psi.rst for details.
+	:ref:`Documentation/accounting/psi.rst <psi>` for details.


 Writeback
@ -1853,7 +1852,7 @@ Cpuset Interface Files
 	from the requested CPUs.

 	The CPU numbers are comma-separated numbers or ranges.
-	For example:
+	For example::

 	  # cat cpuset.cpus
 	  0-4,6,8-10
@ -1892,7 +1891,7 @@ Cpuset Interface Files
 	from the requested memory nodes.

 	The memory node numbers are comma-separated numbers or ranges.
-	For example:
+	For example::

 	  # cat cpuset.mems
 	  0-1,3
--- a/Documentation/admin-guide/dynamic-debug-howto.rst
+++ b/Documentation/admin-guide/dynamic-debug-howto.rst
@ -54,6 +54,9 @@ If you make a mistake with the syntax, the write will fail thus::
 				<debugfs>/dynamic_debug/control
  -bash: echo: write error: Invalid argument

+Note, for systems without 'debugfs' enabled, the control file can be
+found in ``/proc/dynamic_debug/control``.
+
 Viewing Dynamic Debug Behaviour
 ===============================

--- a/Documentation/admin-guide/edid.rst
+++ b/Documentation/admin-guide/edid.rst
@ -11,11 +11,13 @@ Today, with the advent of Kernel Mode Setting, a graphics board is
 either correctly working because all components follow the standards -
 or the computer is unusable, because the screen remains dark after
 booting or it displays the wrong area. Cases when this happens are:
+
 - The graphics board does not recognize the monitor.
 - The graphics board is unable to detect any EDID data.
 - The graphics board incorrectly forwards EDID data to the driver.
 - The monitor sends no or bogus EDID data.
 - A KVM sends its own EDID data instead of querying the connected monitor.
+
 Adding the kernel parameter "nomodeset" helps in most cases, but causes
 restrictions later on.

@ -32,7 +34,7 @@ individual data for a specific misbehaving monitor, commented sources
 and a Makefile environment are given here.

 To create binary EDID and C source code files from the existing data
-material, simply type "make".
+material, simply type "make" in tools/edid/.

 If you want to create your own EDID file, copy the file 1024x768.S,
 replace the settings with your own data and add a new target to the
--- a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
@ -136,8 +136,6 @@ enables the mitigation by default.
 The mitigation can be controlled at boot time via a kernel command line option.
 See :ref:`taa_mitigation_control_command_line`.

-.. _virt_mechanism:
-
 Virtualization mitigation
 ^^^^^^^^^^^^^^^^^^^^^^^^^

--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@ -75,6 +75,7 @@ configure specific aspects of kernel behavior to your liking.
   cputopology
   dell_rbu
   device-mapper/index
+   edid
   efi-stub
   ext4
   nfs/index
--- a/Documentation/admin-guide/iostats.rst
+++ b/Documentation/admin-guide/iostats.rst
@ -100,7 +100,7 @@ Field 10 -- # of milliseconds spent doing I/Os (unsigned int)

    Since 5.0 this field counts jiffies when at least one request was
    started or completed. If request runs more than 2 jiffies then some
-    I/O time will not be accounted unless there are other requests.
+    I/O time might be not accounted in case of concurrent requests.

 Field 11 -- weighted # of milliseconds spent doing I/Os (unsigned int)
    This field is incremented at each I/O start, I/O completion, I/O
@ -143,6 +143,9 @@ are summed (possibly overflowing the unsigned long variable they are
 summed to) and the result given to the user.  There is no convenient
 user interface for accessing the per-CPU counters themselves.

+Since 4.19 request times are measured with nanoseconds precision and
+truncated to milliseconds before showing in this interface.
+
 Disks vs Partitions
 -------------------

--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@ -22,11 +22,13 @@
 			default: 0

 	acpi_backlight=	[HW,ACPI]
-			acpi_backlight=vendor
-			acpi_backlight=video
-			If set to vendor, prefer vendor specific driver
+			{ vendor | video | native | none }
+			If set to vendor, prefer vendor-specific driver
 			(e.g. thinkpad_acpi, sony_acpi, etc.) instead
 			of the ACPI video.ko driver.
+			If set to video, use the ACPI video.ko driver.
+			If set to native, use the device's native backlight mode.
+			If set to none, disable the ACPI backlight interface.

 	acpi_force_32bit_fadt_addr
 			force FADT to use 32 bit addresses rather than the
@ -450,6 +452,9 @@
 	bert_disable	[ACPI]
 			Disable BERT OS support on buggy BIOSes.

+	bgrt_disable	[ACPI][X86]
+			Disable BGRT to avoid flickering OEM logo.
+
 	bttv.card=	[HW,V4L] bttv (bt848 + bt878 based grabber cards)
 	bttv.radio=	Most important insmod options are available as
 			kernel args too.
@ -522,6 +527,7 @@
 			Default value is set via a kernel config option.
 			Value can be changed at runtime via
 				/sys/fs/selinux/checkreqprot.
+			Setting checkreqprot to 1 is deprecated.

 	cio_ignore=	[S390]
 			See Documentation/s390/common_io.rst for details.
@ -679,7 +685,7 @@
 	coredump_filter=
 			[KNL] Change the default value for
 			/proc/<pid>/coredump_filter.
-			See also Documentation/filesystems/proc.txt.
+			See also Documentation/filesystems/proc.rst.

 	coresight_cpu_debug.enable
 			[ARM,ARM64]
@ -956,7 +962,7 @@
 			edid/1680x1050.bin, or edid/1920x1080.bin is given
 			and no file with the same name exists. Details and
 			instructions how to build your own EDID data are
-			available in Documentation/driver-api/edid.rst. An EDID
+			available in Documentation/admin-guide/edid.rst. An EDID
 			data set will only be used for a particular connector,
 			if its name and a colon are prepended to the EDID
 			name. Each connector may use a unique EDID data
@ -986,10 +992,6 @@
 			Documentation/admin-guide/dynamic-debug-howto.rst
 			for details.

-	nompx		[X86] Disables Intel Memory Protection Extensions.
-			See Documentation/x86/intel_mpx.rst for more
-			information about the feature.
-
 	nopku		[X86] Disable Memory Protection Keys CPU feature found
 			in some Intel CPUs.

@ -1099,6 +1101,12 @@
 			A valid base address must be provided, and the serial
 			port must already be setup and configured.

+		ec_imx21,<addr>
+		ec_imx6q,<addr>
+			Start an early, polled-mode, output-only console on the
+			Freescale i.MX UART at the specified address. The UART
+			must already be setup and configured.
+
 		ar3700_uart,<addr>
 			Start an early, polled-mode console on the
 			Armada 3700 serial port at the specified
@ -1354,6 +1362,24 @@
 			can be changed at run time by the max_graph_depth file
 			in the tracefs tracing directory. default: 0 (no limit)

+	fw_devlink=	[KNL] Create device links between consumer and supplier
+			devices by scanning the firmware to infer the
+			consumer/supplier relationships. This feature is
+			especially useful when drivers are loaded as modules as
+			it ensures proper ordering of tasks like device probing
+			(suppliers first, then consumers), supplier boot state
+			clean up (only after all consumers have probed),
+			suspend/resume & runtime PM (consumers first, then
+			suppliers).
+			Format: { off | permissive | on | rpm }
+			off --	Don't create device links from firmware info.
+			permissive -- Create device links from firmware info
+				but use it only for ordering boot state clean
+				up (sync_state() calls).
+			on -- 	Create device links from firmware info and use it
+				to enforce probe and suspend/resume ordering.
+			rpm --	Like "on", but also use to order runtime PM.
+
 	gamecon.map[2|3]=
 			[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
 			support via parallel port (up to 5 devices per port)
@ -1445,6 +1471,14 @@
 	hpet_mmap=	[X86, HPET_MMAP] Allow userspace to mmap HPET
 			registers.  Default set by CONFIG_HPET_MMAP_DEFAULT.

+	hugetlb_cma=	[HW] The size of a cma area used for allocation
+			of gigantic hugepages.
+			Format: nn[KMGTPE]
+
+			Reserve a cma area of given size and allocate gigantic
+			hugepages using the cma allocator. If enabled, the
+			boot-time allocation of gigantic hugepages is skipped.
+
 	hugepages=	[HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
 	hugepagesz=	[HW,IA-64,PPC,X86-64] The size of the HugeTLB pages.
 			On x86-64 and powerpc, this option can be specified
@ -1779,7 +1813,7 @@
 			provided by tboot because it makes the system
 			vulnerable to DMA attacks.
 		nobounce [Default off]
-			Disable bounce buffer for unstrusted devices such as
+			Disable bounce buffer for untrusted devices such as
 			the Thunderbolt devices. This will treat the untrusted
 			devices as the trusted ones, hence might expose security
 			risks of DMA attacks.
@ -1883,7 +1917,7 @@
 			No delay

 	ip=		[IP_PNP]
-			See Documentation/filesystems/nfs/nfsroot.txt.
+			See Documentation/admin-guide/nfs/nfsroot.rst.

 	ipcmni_extend	[KNL] Extend the maximum number of unique System V
 			IPC identifiers from 32,768 to 16,777,216.
@ -2543,13 +2577,22 @@
 			For details see: Documentation/admin-guide/hw-vuln/mds.rst

 	mem=nn[KMG]	[KNL,BOOT] Force usage of a specific amount of memory
-			Amount of memory to be used when the kernel is not able
-			to see the whole system memory or for test.
+			Amount of memory to be used in cases as follows:
+
+			1 for test;
+			2 when the kernel is not able to see the whole system memory;
+			3 memory that lies after 'mem=' boundary is excluded from
+			 the hypervisor, then assigned to KVM guests.
+
 			[X86] Work as limiting max address. Use together
 			with memmap= to avoid physical address space collisions.
 			Without memmap= PCI devices could be placed at addresses
 			belonging to unused RAM.

+			Note that this only takes effects during boot time since
+			in above case 3, memory may need be hot added after boot
+			if system memory of hypervisor is not sufficient.
+
 	mem=nopentium	[BUGS=X86-32] Disable usage of 4MB pages for kernel
 			memory.

@ -2795,7 +2838,7 @@
 			<name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>]

 	mtdparts=	[MTD]
-			See drivers/mtd/cmdlinepart.c.
+			See drivers/mtd/parsers/cmdlinepart.c

 	multitce=off	[PPC]  This parameter disables the use of the pSeries
 			firmware feature for updating multiple TCE entries
@ -2853,13 +2896,13 @@
 			Default value is 0.

 	nfsaddrs=	[NFS] Deprecated.  Use ip= instead.
-			See Documentation/filesystems/nfs/nfsroot.txt.
+			See Documentation/admin-guide/nfs/nfsroot.rst.

 	nfsroot=	[NFS] nfs root filesystem for disk-less boxes.
-			See Documentation/filesystems/nfs/nfsroot.txt.
+			See Documentation/admin-guide/nfs/nfsroot.rst.

 	nfsrootdebug	[NFS] enable nfsroot debugging messages.
-			See Documentation/filesystems/nfs/nfsroot.txt.
+			See Documentation/admin-guide/nfs/nfsroot.rst.

 	nfs.callback_nr_threads=
 			[NFSv4] set the total number of threads that the
@ -3174,7 +3217,7 @@
 			[X86,PV_OPS] Disable paravirtualized VMware scheduler
 			clock and use the default one.

-	no-steal-acc	[X86,KVM,ARM64] Disable paravirtualized steal time
+	no-steal-acc	[X86,PV_OPS,ARM64] Disable paravirtualized steal time
 			accounting. steal time is computed, but won't
 			influence scheduler behaviour

@ -3285,12 +3328,6 @@
 			This can be set from sysctl after boot.
 			See Documentation/admin-guide/sysctl/vm.rst for details.

-	of_devlink	[OF, KNL] Create device links between consumer and
-			supplier devices by scanning the devictree to infer the
-			consumer/supplier relationships.  A consumer device
-			will not be probed until all the supplier devices have
-			probed successfully.
-
 	ohci1394_dma=early	[HW] enable debugging via the ohci1394 driver.
 			See Documentation/debugging-via-ohci1394.txt for more
 			info.
@ -3698,6 +3735,9 @@
 			Override pmtimer IOPort with a hex value.
 			e.g. pmtmr=0x508

+	pm_debug_messages	[SUSPEND,KNL]
+			Enable suspend/resume debug messages during boot up.
+
 	pnp.debug=1	[PNP]
 			Enable PNP debug messages (depends on the
 			CONFIG_PNP_DEBUG_MESSAGES option).  Change at run-time
@ -3799,6 +3839,11 @@
 			before loading.
 			See Documentation/admin-guide/blockdev/ramdisk.rst.

+	prot_virt=	[S390] enable hosting protected virtual machines
+			isolated from the hypervisor (if hardware supports
+			that).
+			Format: <bool>
+
 	psi=		[KNL] Enable or disable pressure stall information
 			tracking.
 			Format: <bool>
@ -3984,6 +4029,15 @@
 			Set threshold of queued RCU callbacks below which
 			batch limiting is re-enabled.

+	rcutree.qovld= [KNL]
+			Set threshold of queued RCU callbacks beyond which
+			RCU's force-quiescent-state scan will aggressively
+			enlist help from cond_resched() and sched IPIs to
+			help CPUs more quickly reach quiescent states.
+			Set to less than zero to make this be set based
+			on rcutree.qhimark at boot time and to zero to
+			disable more aggressive help enlistment.
+
 	rcutree.rcu_idle_gp_delay= [KNL]
 			Set wakeup interval for idle CPUs that have
 			RCU callbacks (RCU_FAST_NO_HZ=y).
@ -4199,6 +4253,12 @@
 	rcupdate.rcu_cpu_stall_suppress= [KNL]
 			Suppress RCU CPU stall warning messages.

+	rcupdate.rcu_cpu_stall_suppress_at_boot= [KNL]
+			Suppress RCU CPU stall warning messages and
+			rcutorture writer stall warnings that occur
+			during early boot, that is, during the time
+			before the init task is spawned.
+
 	rcupdate.rcu_cpu_stall_timeout= [KNL]
 			Set timeout for RCU CPU stall warning messages.

@ -4392,6 +4452,22 @@
 			incurs a small amount of overhead in the scheduler
 			but is useful for debugging and performance tuning.

+	sched_thermal_decay_shift=
+			[KNL, SMP] Set a decay shift for scheduler thermal
+			pressure signal. Thermal pressure signal follows the
+			default decay period of other scheduler pelt
+			signals(usually 32 ms but configurable). Setting
+			sched_thermal_decay_shift will left shift the decay
+			period for the thermal pressure signal by the shift
+			value.
+			i.e. with the default pelt decay period of 32 ms
+			sched_thermal_decay_shift   thermal pressure decay pr
+				1			64 ms
+				2			128 ms
+			and so on.
+			Format: integer between 0 and 10
+			Default is 0.
+
 	skew_tick=	[KNL] Offset the periodic timer tick per cpu to mitigate
 			xtime_lock contention on larger systems, and/or RCU lock
 			contention on all systems with CONFIG_MAXSMP set.
@ -4514,10 +4590,10 @@
 			Format: <integer>

 			A nonzero value instructs the soft-lockup detector
-			to panic the machine when a soft-lockup occurs. This
-			is also controlled by CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC
-			which is the respective build-time switch to that
-			functionality.
+			to panic the machine when a soft-lockup occurs. It is
+			also controlled by the kernel.softlockup_panic sysctl
+			and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the
+			respective build-time switch to that functionality.

 	softlockup_all_cpu_backtrace=
 			[KNL] Should the soft-lockup detector generate
@ -4659,6 +4735,28 @@
 	spia_pedr=
 	spia_peddr=

+	split_lock_detect=
+			[X86] Enable split lock detection
+
+			When enabled (and if hardware support is present), atomic
+			instructions that access data across cache line
+			boundaries will result in an alignment check exception.
+
+			off	- not enabled
+
+			warn	- the kernel will emit rate limited warnings
+				  about applications triggering the #AC
+				  exception. This mode is the default on CPUs
+				  that supports split lock detection.
+
+			fatal	- the kernel will send SIGBUS to applications
+				  that trigger the #AC exception.
+
+			If an #AC exception is hit in the kernel or in
+			firmware (i.e. not while executing in user mode)
+			the kernel will oops in either "warn" or "fatal"
+			mode.
+
 	srcutree.counter_wrap_check [KNL]
 			Specifies how frequently to check for
 			grace-period sequence counter wrap for the
@ -4871,6 +4969,10 @@
 			topology updates sent by the hypervisor to this
 			LPAR.

+	torture.disable_onoff_at_boot= [KNL]
+			Prevent the CPU-hotplug component of torturing
+			until after init has spawned.
+
 	tp720=		[HW,PS2]

 	tpm_suspend_pcr=[HW,TPM]
--- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
+++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
@ -234,7 +234,7 @@ To reduce its OS jitter, do any of the following:
 	Such a workqueue can be confined to a given subset of the
 	CPUs using the ``/sys/devices/virtual/workqueue/*/cpumask`` sysfs
 	files.	The set of WQ_SYSFS workqueues can be displayed using
-	"ls sys/devices/virtual/workqueue".  That said, the workqueues
+	"ls /sys/devices/virtual/workqueue".  That said, the workqueues
 	maintainer would like to caution people against indiscriminately
 	sprinkling WQ_SYSFS across all the workqueues.	The reason for
 	caution is that it is easy to add WQ_SYSFS, but because sysfs is
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@ -310,6 +310,11 @@ thp_fault_fallback
 	is incremented if a page fault fails to allocate
 	a huge page and instead falls back to using small pages.

+thp_fault_fallback_charge
+	is incremented if a page fault fails to charge a huge page and
+	instead falls back to using small pages even though the
+	allocation was successful.
+
 thp_collapse_alloc_failed
 	is incremented if khugepaged found a range
 	of pages that should be collapsed into one huge page but failed
@ -319,6 +324,15 @@ thp_file_alloc
 	is incremented every time a file huge page is successfully
 	allocated.

+thp_file_fallback
+	is incremented if a file huge page is attempted to be allocated
+	but fails and instead falls back to using small pages.
+
+thp_file_fallback_charge
+	is incremented if a file huge page cannot be charged and instead
+	falls back to using small pages even though the allocation was
+	successful.
+
 thp_file_mapped
 	is incremented every time a file huge page is mapped into
 	user address space.
--- a/Documentation/admin-guide/mm/userfaultfd.rst
+++ b/Documentation/admin-guide/mm/userfaultfd.rst
@ -108,6 +108,57 @@ UFFDIO_COPY. They're atomic as in guaranteeing that nothing can see an
 half copied page since it'll keep userfaulting until the copy has
 finished.

+Notes:
+
+- If you requested UFFDIO_REGISTER_MODE_MISSING when registering then
+  you must provide some kind of page in your thread after reading from
+  the uffd.  You must provide either UFFDIO_COPY or UFFDIO_ZEROPAGE.
+  The normal behavior of the OS automatically providing a zero page on
+  an annonymous mmaping is not in place.
+
+- None of the page-delivering ioctls default to the range that you
+  registered with.  You must fill in all fields for the appropriate
+  ioctl struct including the range.
+
+- You get the address of the access that triggered the missing page
+  event out of a struct uffd_msg that you read in the thread from the
+  uffd.  You can supply as many pages as you want with UFFDIO_COPY or
+  UFFDIO_ZEROPAGE.  Keep in mind that unless you used DONTWAKE then
+  the first of any of those IOCTLs wakes up the faulting thread.
+
+- Be sure to test for all errors including (pollfd[0].revents &
+  POLLERR).  This can happen, e.g. when ranges supplied were
+  incorrect.
+
+Write Protect Notifications
+---------------------------
+
+This is equivalent to (but faster than) using mprotect and a SIGSEGV
+signal handler.
+
+Firstly you need to register a range with UFFDIO_REGISTER_MODE_WP.
+Instead of using mprotect(2) you use ioctl(uffd, UFFDIO_WRITEPROTECT,
+struct *uffdio_writeprotect) while mode = UFFDIO_WRITEPROTECT_MODE_WP
+in the struct passed in.  The range does not default to and does not
+have to be identical to the range you registered with.  You can write
+protect as many ranges as you like (inside the registered range).
+Then, in the thread reading from uffd the struct will have
+msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP set. Now you send
+ioctl(uffd, UFFDIO_WRITEPROTECT, struct *uffdio_writeprotect) again
+while pagefault.mode does not have UFFDIO_WRITEPROTECT_MODE_WP set.
+This wakes up the thread which will continue to run with writes. This
+allows you to do the bookkeeping about the write in the uffd reading
+thread before the ioctl.
+
+If you registered with both UFFDIO_REGISTER_MODE_MISSING and
+UFFDIO_REGISTER_MODE_WP then you need to think about the sequence in
+which you supply a page and undo write protect.  Note that there is a
+difference between writes into a WP area and into a !WP area.  The
+former will have UFFD_PAGEFAULT_FLAG_WP set, the latter
+UFFD_PAGEFAULT_FLAG_WRITE.  The latter did not fail on protection but
+you still need to supply a page when UFFDIO_REGISTER_MODE_MISSING was
+used.
+
 QEMU/KVM
 ========

--- a/Documentation/admin-guide/perf/imx-ddr.rst
+++ b/Documentation/admin-guide/perf/imx-ddr.rst
@ -43,7 +43,8 @@ value 1 for supported.

  AXI_ID and AXI_MASKING are mapped on DPCR1 register in performance counter.
  When non-masked bits are matching corresponding AXI_ID bits then counter is
-  incremented. Perf counter is incremented if
+  incremented. Perf counter is incremented if::
+
        AxID && AXI_MASKING == AXI_ID && AXI_MASKING

  This filter doesn't support filter different AXI ID for axid-read and axid-write
--- a/Documentation/admin-guide/pm/cpufreq_drivers.rst
+++ b/Documentation/admin-guide/pm/cpufreq_drivers.rst
@ -0,0 +1,274 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================================
+Legacy Documentation of CPU Performance Scaling Drivers
+=======================================================
+
+Included below are historic documents describing assorted
+:doc:`CPU performance scaling <cpufreq>` drivers.  They are reproduced verbatim,
+with the original white space formatting and indentation preserved, except for
+the added leading space character in every line of text.
+
+
+AMD PowerNow! Drivers
+=====================
+
+::
+
+ PowerNow! and Cool'n'Quiet are AMD names for frequency
+ management capabilities in AMD processors. As the hardware
+ implementation changes in new generations of the processors,
+ there is a different cpu-freq driver for each generation.
+
+ Note that the driver's will not load on the "wrong" hardware,
+ so it is safe to try each driver in turn when in doubt as to
+ which is the correct driver.
+
+ Note that the functionality to change frequency (and voltage)
+ is not available in all processors. The drivers will refuse
+ to load on processors without this capability. The capability
+ is detected with the cpuid instruction.
+
+ The drivers use BIOS supplied tables to obtain frequency and
+ voltage information appropriate for a particular platform.
+ Frequency transitions will be unavailable if the BIOS does
+ not supply these tables.
+
+ 6th Generation: powernow-k6
+
+ 7th Generation: powernow-k7: Athlon, Duron, Geode.
+
+ 8th Generation: powernow-k8: Athlon, Athlon 64, Opteron, Sempron.
+ Documentation on this functionality in 8th generation processors
+ is available in the "BIOS and Kernel Developer's Guide", publication
+ 26094, in chapter 9, available for download from www.amd.com.
+
+ BIOS supplied data, for powernow-k7 and for powernow-k8, may be
+ from either the PSB table or from ACPI objects. The ACPI support
+ is only available if the kernel config sets CONFIG_ACPI_PROCESSOR.
+ The powernow-k8 driver will attempt to use ACPI if so configured,
+ and fall back to PST if that fails.
+ The powernow-k7 driver will try to use the PSB support first, and
+ fall back to ACPI if the PSB support fails. A module parameter,
+ acpi_force, is provided to force ACPI support to be used instead
+ of PSB support.
+
+
+``cpufreq-nforce2``
+===================
+
+::
+
+ The cpufreq-nforce2 driver changes the FSB on nVidia nForce2 platforms.
+
+ This works better than on other platforms, because the FSB of the CPU
+ can be controlled independently from the PCI/AGP clock.
+
+ The module has two options:
+
+ 	fid: 	 multiplier * 10 (for example 8.5 = 85)
+ 	min_fsb: minimum FSB
+
+ If not set, fid is calculated from the current CPU speed and the FSB.
+ min_fsb defaults to FSB at boot time - 50 MHz.
+
+ IMPORTANT: The available range is limited downwards!
+            Also the minimum available FSB can differ, for systems
+            booting with 200 MHz, 150 should always work.
+
+
+``pcc-cpufreq``
+===============
+
+::
+
+ /*
+  *  pcc-cpufreq.txt - PCC interface documentation
+  *
+  *  Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
+  *  Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+  *      Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
+  */
+
+
+ 			Processor Clocking Control Driver
+ 			---------------------------------
+
+ Contents:
+ ---------
+ 1.	Introduction
+ 1.1	PCC interface
+ 1.1.1	Get Average Frequency
+ 1.1.2	Set Desired Frequency
+ 1.2	Platforms affected
+ 2.	Driver and /sys details
+ 2.1	scaling_available_frequencies
+ 2.2	cpuinfo_transition_latency
+ 2.3	cpuinfo_cur_freq
+ 2.4	related_cpus
+ 3.	Caveats
+
+ 1. Introduction:
+ ----------------
+ Processor Clocking Control (PCC) is an interface between the platform
+ firmware and OSPM. It is a mechanism for coordinating processor
+ performance (ie: frequency) between the platform firmware and the OS.
+
+ The PCC driver (pcc-cpufreq) allows OSPM to take advantage of the PCC
+ interface.
+
+ OS utilizes the PCC interface to inform platform firmware what frequency the
+ OS wants for a logical processor. The platform firmware attempts to achieve
+ the requested frequency. If the request for the target frequency could not be
+ satisfied by platform firmware, then it usually means that power budget
+ conditions are in place, and "power capping" is taking place.
+
+ 1.1 PCC interface:
+ ------------------
+ The complete PCC specification is available here:
+ https://acpica.org/sites/acpica/files/Processor-Clocking-Control-v1p0.pdf
+
+ PCC relies on a shared memory region that provides a channel for communication
+ between the OS and platform firmware. PCC also implements a "doorbell" that
+ is used by the OS to inform the platform firmware that a command has been
+ sent.
+
+ The ACPI PCCH() method is used to discover the location of the PCC shared
+ memory region. The shared memory region header contains the "command" and
+ "status" interface. PCCH() also contains details on how to access the platform
+ doorbell.
+
+ The following commands are supported by the PCC interface:
+ * Get Average Frequency
+ * Set Desired Frequency
+
+ The ACPI PCCP() method is implemented for each logical processor and is
+ used to discover the offsets for the input and output buffers in the shared
+ memory region.
+
+ When PCC mode is enabled, the platform will not expose processor performance
+ or throttle states (_PSS, _TSS and related ACPI objects) to OSPM. Therefore,
+ the native P-state driver (such as acpi-cpufreq for Intel, powernow-k8 for
+ AMD) will not load.
+
+ However, OSPM remains in control of policy. The governor (eg: "ondemand")
+ computes the required performance for each processor based on server workload.
+ The PCC driver fills in the command interface, and the input buffer and
+ communicates the request to the platform firmware. The platform firmware is
+ responsible for delivering the requested performance.
+
+ Each PCC command is "global" in scope and can affect all the logical CPUs in
+ the system. Therefore, PCC is capable of performing "group" updates. With PCC
+ the OS is capable of getting/setting the frequency of all the logical CPUs in
+ the system with a single call to the BIOS.
+
+ 1.1.1 Get Average Frequency:
+ ----------------------------
+ This command is used by the OSPM to query the running frequency of the
+ processor since the last time this command was completed. The output buffer
+ indicates the average unhalted frequency of the logical processor expressed as
+ a percentage of the nominal (ie: maximum) CPU frequency. The output buffer
+ also signifies if the CPU frequency is limited by a power budget condition.
+
+ 1.1.2 Set Desired Frequency:
+ ----------------------------
+ This command is used by the OSPM to communicate to the platform firmware the
+ desired frequency for a logical processor. The output buffer is currently
+ ignored by OSPM. The next invocation of "Get Average Frequency" will inform
+ OSPM if the desired frequency was achieved or not.
+
+ 1.2 Platforms affected:
+ -----------------------
+ The PCC driver will load on any system where the platform firmware:
+ * supports the PCC interface, and the associated PCCH() and PCCP() methods
+ * assumes responsibility for managing the hardware clocking controls in order
+ to deliver the requested processor performance
+
+ Currently, certain HP ProLiant platforms implement the PCC interface. On those
+ platforms PCC is the "default" choice.
+
+ However, it is possible to disable this interface via a BIOS setting. In
+ such an instance, as is also the case on platforms where the PCC interface
+ is not implemented, the PCC driver will fail to load silently.
+
+ 2. Driver and /sys details:
+ ---------------------------
+ When the driver loads, it merely prints the lowest and the highest CPU
+ frequencies supported by the platform firmware.
+
+ The PCC driver loads with a message such as:
+ pcc-cpufreq: (v1.00.00) driver loaded with frequency limits: 1600 MHz, 2933
+ MHz
+
+ This means that the OPSM can request the CPU to run at any frequency in
+ between the limits (1600 MHz, and 2933 MHz) specified in the message.
+
+ Internally, there is no need for the driver to convert the "target" frequency
+ to a corresponding P-state.
+
+ The VERSION number for the driver will be of the format v.xy.ab.
+ eg: 1.00.02
+    ----- --
+     |    |
+     |    -- this will increase with bug fixes/enhancements to the driver
+     |-- this is the version of the PCC specification the driver adheres to
+
+
+ The following is a brief discussion on some of the fields exported via the
+ /sys filesystem and how their values are affected by the PCC driver:
+
+ 2.1 scaling_available_frequencies:
+ ----------------------------------
+ scaling_available_frequencies is not created in /sys. No intermediate
+ frequencies need to be listed because the BIOS will try to achieve any
+ frequency, within limits, requested by the governor. A frequency does not have
+ to be strictly associated with a P-state.
+
+ 2.2 cpuinfo_transition_latency:
+ -------------------------------
+ The cpuinfo_transition_latency field is 0. The PCC specification does
+ not include a field to expose this value currently.
+
+ 2.3 cpuinfo_cur_freq:
+ ---------------------
+ A) Often cpuinfo_cur_freq will show a value different than what is declared
+ in the scaling_available_frequencies or scaling_cur_freq, or scaling_max_freq.
+ This is due to "turbo boost" available on recent Intel processors. If certain
+ conditions are met the BIOS can achieve a slightly higher speed than requested
+ by OSPM. An example:
+
+ scaling_cur_freq	: 2933000
+ cpuinfo_cur_freq	: 3196000
+
+ B) There is a round-off error associated with the cpuinfo_cur_freq value.
+ Since the driver obtains the current frequency as a "percentage" (%) of the
+ nominal frequency from the BIOS, sometimes, the values displayed by
+ scaling_cur_freq and cpuinfo_cur_freq may not match. An example:
+
+ scaling_cur_freq	: 1600000
+ cpuinfo_cur_freq	: 1583000
+
+ In this example, the nominal frequency is 2933 MHz. The driver obtains the
+ current frequency, cpuinfo_cur_freq, as 54% of the nominal frequency:
+
+ 	54% of 2933 MHz = 1583 MHz
+
+ Nominal frequency is the maximum frequency of the processor, and it usually
+ corresponds to the frequency of the P0 P-state.
+
+ 2.4 related_cpus:
+ -----------------
+ The related_cpus field is identical to affected_cpus.
+
+ affected_cpus	: 4
+ related_cpus	: 4
+
+ Currently, the PCC driver does not evaluate _PSD. The platforms that support
+ PCC do not implement SW_ALL. So OSPM doesn't need to perform any coordination
+ to ensure that the same frequency is requested of all dependent CPUs.
+
+ 3. Caveats:
+ -----------
+ The "cpufreq_stats" module in its present form cannot be loaded and
+ expected to work with the PCC driver. Since the "cpufreq_stats" module
+ provides information wrt each P-state, it is not applicable to the PCC driver.
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@ -583,20 +583,17 @@ Power Management Quality of Service for CPUs
 The power management quality of service (PM QoS) framework in the Linux kernel
 allows kernel code and user space processes to set constraints on various
 energy-efficiency features of the kernel to prevent performance from dropping
-below a required level.  The PM QoS constraints can be set globally, in
-predefined categories referred to as PM QoS classes, or against individual
-devices.
+below a required level.

 CPU idle time management can be affected by PM QoS in two ways, through the
-global constraint in the ``PM_QOS_CPU_DMA_LATENCY`` class and through the
-resume latency constraints for individual CPUs.  Kernel code (e.g. device
-drivers) can set both of them with the help of special internal interfaces
-provided by the PM QoS framework.  User space can modify the former by opening
-the :file:`cpu_dma_latency` special device file under :file:`/dev/` and writing
-a binary value (interpreted as a signed 32-bit integer) to it.  In turn, the
-resume latency constraint for a CPU can be modified by user space by writing a
-string (representing a signed 32-bit integer) to the
-:file:`power/pm_qos_resume_latency_us` file under
+global CPU latency limit and through the resume latency constraints for
+individual CPUs.  Kernel code (e.g. device drivers) can set both of them with
+the help of special internal interfaces provided by the PM QoS framework.  User
+space can modify the former by opening the :file:`cpu_dma_latency` special
+device file under :file:`/dev/` and writing a binary value (interpreted as a
+signed 32-bit integer) to it.  In turn, the resume latency constraint for a CPU
+can be modified from user space by writing a string (representing a signed
+32-bit integer) to the :file:`power/pm_qos_resume_latency_us` file under
 :file:`/sys/devices/system/cpu/cpu<N>/` in ``sysfs``, where the CPU number
 ``<N>`` is allocated at the system initialization time.  Negative values
 will be rejected in both cases and, also in both cases, the written integer
@ -605,32 +602,34 @@ number will be interpreted as a requested PM QoS constraint in microseconds.
 The requested value is not automatically applied as a new constraint, however,
 as it may be less restrictive (greater in this particular case) than another
 constraint previously requested by someone else.  For this reason, the PM QoS
-framework maintains a list of requests that have been made so far in each
-global class and for each device, aggregates them and applies the effective
-(minimum in this particular case) value as the new constraint.
+framework maintains a list of requests that have been made so far for the
+global CPU latency limit and for each individual CPU, aggregates them and
+applies the effective (minimum in this particular case) value as the new
+constraint.

 In fact, opening the :file:`cpu_dma_latency` special device file causes a new
-PM QoS request to be created and added to the priority list of requests in the
-``PM_QOS_CPU_DMA_LATENCY`` class and the file descriptor coming from the
-"open" operation represents that request.  If that file descriptor is then
-used for writing, the number written to it will be associated with the PM QoS
-request represented by it as a new requested constraint value.  Next, the
-priority list mechanism will be used to determine the new effective value of
-the entire list of requests and that effective value will be set as a new
-constraint.  Thus setting a new requested constraint value will only change the
-real constraint if the effective "list" value is affected by it.  In particular,
-for the ``PM_QOS_CPU_DMA_LATENCY`` class it only affects the real constraint if
-it is the minimum of the requested constraints in the list.  The process holding
-a file descriptor obtained by opening the :file:`cpu_dma_latency` special device
-file controls the PM QoS request associated with that file descriptor, but it
-controls this particular PM QoS request only.
+PM QoS request to be created and added to a global priority list of CPU latency
+limit requests and the file descriptor coming from the "open" operation
+represents that request.  If that file descriptor is then used for writing, the
+number written to it will be associated with the PM QoS request represented by
+it as a new requested limit value.  Next, the priority list mechanism will be
+used to determine the new effective value of the entire list of requests and
+that effective value will be set as a new CPU latency limit.  Thus requesting a
+new limit value will only change the real limit if the effective "list" value is
+affected by it, which is the case if it is the minimum of the requested values
+in the list.
+
+The process holding a file descriptor obtained by opening the
+:file:`cpu_dma_latency` special device file controls the PM QoS request
+associated with that file descriptor, but it controls this particular PM QoS
+request only.

 Closing the :file:`cpu_dma_latency` special device file or, more precisely, the
 file descriptor obtained while opening it, causes the PM QoS request associated
-with that file descriptor to be removed from the ``PM_QOS_CPU_DMA_LATENCY``
-class priority list and destroyed.  If that happens, the priority list mechanism
-will be used, again, to determine the new effective value for the whole list
-and that value will become the new real constraint.
+with that file descriptor to be removed from the global priority list of CPU
+latency limit requests and destroyed.  If that happens, the priority list
+mechanism will be used again, to determine the new effective value for the whole
+list and that value will become the new limit.

 In turn, for each CPU there is one resume latency PM QoS request associated with
 the :file:`power/pm_qos_resume_latency_us` file under
@ -647,10 +646,10 @@ CPU in question every time the list of requests is updated this way or another
 (there may be other requests coming from kernel code in that list).

 CPU idle time governors are expected to regard the minimum of the global
-effective ``PM_QOS_CPU_DMA_LATENCY`` class constraint and the effective
-resume latency constraint for the given CPU as the upper limit for the exit
-latency of the idle states they can select for that CPU.  They should never
-select any idle states with exit latency beyond that limit.
+(effective) CPU latency limit and the effective resume latency constraint for
+the given CPU as the upper limit for the exit latency of the idle states that
+they are allowed to select for that CPU.  They should never select any idle
+states with exit latency beyond that limit.


 Idle States Control Via Kernel Command Line
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@ -734,10 +734,10 @@ References
 ==========

 .. [1] Kristen Accardi, *Balancing Power and Performance in the Linux Kernel*,
-       http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
+       https://events.static.linuxfound.org/sites/events/files/slides/LinuxConEurope_2015.pdf

 .. [2] *Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 3: System Programming Guide*,
-       http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
+       https://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html

 .. [3] *Advanced Configuration and Power Interface Specification*,
       https://uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf
--- a/Documentation/admin-guide/pm/suspend-flows.rst
+++ b/Documentation/admin-guide/pm/suspend-flows.rst
@ -0,0 +1,270 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=========================
+System Suspend Code Flows
+=========================
+
+:Copyright: |copy| 2020 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+At least one global system-wide transition needs to be carried out for the
+system to get from the working state into one of the supported
+:doc:`sleep states <sleep-states>`.  Hibernation requires more than one
+transition to occur for this purpose, but the other sleep states, commonly
+referred to as *system-wide suspend* (or simply *system suspend*) states, need
+only one.
+
+For those sleep states, the transition from the working state of the system into
+the target sleep state is referred to as *system suspend* too (in the majority
+of cases, whether this means a transition or a sleep state of the system should
+be clear from the context) and the transition back from the sleep state into the
+working state is referred to as *system resume*.
+
+The kernel code flows associated with the suspend and resume transitions for
+different sleep states of the system are quite similar, but there are some
+significant differences between the :ref:`suspend-to-idle <s2idle>` code flows
+and the code flows related to the :ref:`suspend-to-RAM <s2ram>` and
+:ref:`standby <standby>` sleep states.
+
+The :ref:`suspend-to-RAM <s2ram>` and :ref:`standby <standby>` sleep states
+cannot be implemented without platform support and the difference between them
+boils down to the platform-specific actions carried out by the suspend and
+resume hooks that need to be provided by the platform driver to make them
+available.  Apart from that, the suspend and resume code flows for these sleep
+states are mostly identical, so they both together will be referred to as
+*platform-dependent suspend* states in what follows.
+
+
+.. _s2idle_suspend:
+
+Suspend-to-idle Suspend Code Flow
+=================================
+
+The following steps are taken in order to transition the system from the working
+state to the :ref:`suspend-to-idle <s2idle>` sleep state:
+
+ 1. Invoking system-wide suspend notifiers.
+
+    Kernel subsystems can register callbacks to be invoked when the suspend
+    transition is about to occur and when the resume transition has finished.
+
+    That allows them to prepare for the change of the system state and to clean
+    up after getting back to the working state.
+
+ 2. Freezing tasks.
+
+    Tasks are frozen primarily in order to avoid unchecked hardware accesses
+    from user space through MMIO regions or I/O registers exposed directly to
+    it and to prevent user space from entering the kernel while the next step
+    of the transition is in progress (which might have been problematic for
+    various reasons).
+
+    All user space tasks are intercepted as though they were sent a signal and
+    put into uninterruptible sleep until the end of the subsequent system resume
+    transition.
+
+    The kernel threads that choose to be frozen during system suspend for
+    specific reasons are frozen subsequently, but they are not intercepted.
+    Instead, they are expected to periodically check whether or not they need
+    to be frozen and to put themselves into uninterruptible sleep if so.  [Note,
+    however, that kernel threads can use locking and other concurrency controls
+    available in kernel space to synchronize themselves with system suspend and
+    resume, which can be much more precise than the freezing, so the latter is
+    not a recommended option for kernel threads.]
+
+ 3. Suspending devices and reconfiguring IRQs.
+
+    Devices are suspended in four phases called *prepare*, *suspend*,
+    *late suspend* and *noirq suspend* (see :ref:`driverapi_pm_devices` for more
+    information on what exactly happens in each phase).
+
+    Every device is visited in each phase, but typically it is not physically
+    accessed in more than two of them.
+
+    The runtime PM API is disabled for every device during the *late* suspend
+    phase and high-level ("action") interrupt handlers are prevented from being
+    invoked before the *noirq* suspend phase.
+
+    Interrupts are still handled after that, but they are only acknowledged to
+    interrupt controllers without performing any device-specific actions that
+    would be triggered in the working state of the system (those actions are
+    deferred till the subsequent system resume transition as described
+    `below <s2idle_resume_>`_).
+
+    IRQs associated with system wakeup devices are "armed" so that the resume
+    transition of the system is started when one of them signals an event.
+
+ 4. Freezing the scheduler tick and suspending timekeeping.
+
+    When all devices have been suspended, CPUs enter the idle loop and are put
+    into the deepest available idle state.  While doing that, each of them
+    "freezes" its own scheduler tick so that the timer events associated with
+    the tick do not occur until the CPU is woken up by another interrupt source.
+
+    The last CPU to enter the idle state also stops the timekeeping which
+    (among other things) prevents high resolution timers from triggering going
+    forward until the first CPU that is woken up restarts the timekeeping.
+    That allows the CPUs to stay in the deep idle state relatively long in one
+    go.
+
+    From this point on, the CPUs can only be woken up by non-timer hardware
+    interrupts.  If that happens, they go back to the idle state unless the
+    interrupt that woke up one of them comes from an IRQ that has been armed for
+    system wakeup, in which case the system resume transition is started.
+
+
+.. _s2idle_resume:
+
+Suspend-to-idle Resume Code Flow
+================================
+
+The following steps are taken in order to transition the system from the
+:ref:`suspend-to-idle <s2idle>` sleep state into the working state:
+
+ 1. Resuming timekeeping and unfreezing the scheduler tick.
+
+    When one of the CPUs is woken up (by a non-timer hardware interrupt), it
+    leaves the idle state entered in the last step of the preceding suspend
+    transition, restarts the timekeeping (unless it has been restarted already
+    by another CPU that woke up earlier) and the scheduler tick on that CPU is
+    unfrozen.
+
+    If the interrupt that has woken up the CPU was armed for system wakeup,
+    the system resume transition begins.
+
+ 2. Resuming devices and restoring the working-state configuration of IRQs.
+
+    Devices are resumed in four phases called *noirq resume*, *early resume*,
+    *resume* and *complete* (see :ref:`driverapi_pm_devices` for more
+    information on what exactly happens in each phase).
+
+    Every device is visited in each phase, but typically it is not physically
+    accessed in more than two of them.
+
+    The working-state configuration of IRQs is restored after the *noirq* resume
+    phase and the runtime PM API is re-enabled for every device whose driver
+    supports it during the *early* resume phase.
+
+ 3. Thawing tasks.
+
+    Tasks frozen in step 2 of the preceding `suspend <s2idle_suspend_>`_
+    transition are "thawed", which means that they are woken up from the
+    uninterruptible sleep that they went into at that time and user space tasks
+    are allowed to exit the kernel.
+
+ 4. Invoking system-wide resume notifiers.
+
+    This is analogous to step 1 of the `suspend <s2idle_suspend_>`_ transition
+    and the same set of callbacks is invoked at this point, but a different
+    "notification type" parameter value is passed to them.
+
+
+Platform-dependent Suspend Code Flow
+====================================
+
+The following steps are taken in order to transition the system from the working
+state to platform-dependent suspend state:
+
+ 1. Invoking system-wide suspend notifiers.
+
+    This step is the same as step 1 of the suspend-to-idle suspend transition
+    described `above <s2idle_suspend_>`_.
+
+ 2. Freezing tasks.
+
+    This step is the same as step 2 of the suspend-to-idle suspend transition
+    described `above <s2idle_suspend_>`_.
+
+ 3. Suspending devices and reconfiguring IRQs.
+
+    This step is analogous to step 3 of the suspend-to-idle suspend transition
+    described `above <s2idle_suspend_>`_, but the arming of IRQs for system
+    wakeup generally does not have any effect on the platform.
+
+    There are platforms that can go into a very deep low-power state internally
+    when all CPUs in them are in sufficiently deep idle states and all I/O
+    devices have been put into low-power states.  On those platforms,
+    suspend-to-idle can reduce system power very effectively.
+
+    On the other platforms, however, low-level components (like interrupt
+    controllers) need to be turned off in a platform-specific way (implemented
+    in the hooks provided by the platform driver) to achieve comparable power
+    reduction.
+
+    That usually prevents in-band hardware interrupts from waking up the system,
+    which must be done in a special platform-dependent way.  Then, the
+    configuration of system wakeup sources usually starts when system wakeup
+    devices are suspended and is finalized by the platform suspend hooks later
+    on.
+
+ 4. Disabling non-boot CPUs.
+
+    On some platforms the suspend hooks mentioned above must run in a one-CPU
+    configuration of the system (in particular, the hardware cannot be accessed
+    by any code running in parallel with the platform suspend hooks that may,
+    and often do, trap into the platform firmware in order to finalize the
+    suspend transition).
+
+    For this reason, the CPU offline/online (CPU hotplug) framework is used
+    to take all of the CPUs in the system, except for one (the boot CPU),
+    offline (typically, the CPUs that have been taken offline go into deep idle
+    states).
+
+    This means that all tasks are migrated away from those CPUs and all IRQs are
+    rerouted to the only CPU that remains online.
+
+ 5. Suspending core system components.
+
+    This prepares the core system components for (possibly) losing power going
+    forward and suspends the timekeeping.
+
+ 6. Platform-specific power removal.
+
+    This is expected to remove power from all of the system components except
+    for the memory controller and RAM (in order to preserve the contents of the
+    latter) and some devices designated for system wakeup.
+
+    In many cases control is passed to the platform firmware which is expected
+    to finalize the suspend transition as needed.
+
+
+Platform-dependent Resume Code Flow
+===================================
+
+The following steps are taken in order to transition the system from a
+platform-dependent suspend state into the working state:
+
+ 1. Platform-specific system wakeup.
+
+    The platform is woken up by a signal from one of the designated system
+    wakeup devices (which need not be an in-band hardware interrupt)  and
+    control is passed back to the kernel (the working configuration of the
+    platform may need to be restored by the platform firmware before the
+    kernel gets control again).
+
+ 2. Resuming core system components.
+
+    The suspend-time configuration of the core system components is restored and
+    the timekeeping is resumed.
+
+ 3. Re-enabling non-boot CPUs.
+
+    The CPUs disabled in step 4 of the preceding suspend transition are taken
+    back online and their suspend-time configuration is restored.
+
+ 4. Resuming devices and restoring the working-state configuration of IRQs.
+
+    This step is the same as step 2 of the suspend-to-idle suspend transition
+    described `above <s2idle_resume_>`_.
+
+ 5. Thawing tasks.
+
+    This step is the same as step 3 of the suspend-to-idle suspend transition
+    described `above <s2idle_resume_>`_.
+
+ 6. Invoking system-wide resume notifiers.
+
+    This step is the same as step 4 of the suspend-to-idle suspend transition
+    described `above <s2idle_resume_>`_.
--- a/Documentation/admin-guide/pm/system-wide.rst
+++ b/Documentation/admin-guide/pm/system-wide.rst
@ -8,3 +8,4 @@ System-Wide Power Management
   :maxdepth: 2

   sleep-states
+   suspend-flows
--- a/Documentation/admin-guide/pm/working-state.rst
+++ b/Documentation/admin-guide/pm/working-state.rst
@ -11,4 +11,5 @@ Working-State Power Management
   intel_idle
   cpufreq
   intel_pstate
+   cpufreq_drivers
   intel_epb
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@ -67,7 +67,8 @@ two flavors of JITs, the newer eBPF JIT currently supported on:
  - sparc64
  - mips64
  - s390x
-  - riscv
+  - riscv64
+  - riscv32

 And the older cBPF JIT supported on the following archs:

--- a/Documentation/admin-guide/sysctl/user.rst
+++ b/Documentation/admin-guide/sysctl/user.rst
@ -65,6 +65,12 @@ max_pid_namespaces
  The maximum number of pid namespaces that any user in the current
  user namespace may create.

+max_time_namespaces
+===================
+
+  The maximum number of time namespaces that any user in the current
+  user namespace may create.
+
 max_user_namespaces
 ===================

--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@ -128,6 +128,9 @@ allowed to examine the unevictable lru (mlocked pages) for pages to compact.
 This should be used on systems where stalls for minor page faults are an
 acceptable trade for large contiguous free memory.  Set to 0 to prevent
 compaction from moving pages that are unevictable.  Default value is 1.
+On CONFIG_PREEMPT_RT the default value is 0 in order to avoid a page fault, due
+to compaction, which would block the task from becomming active until the fault
+is resolved.


 dirty_background_bytes
--- a/Documentation/admin-guide/sysrq.rst
+++ b/Documentation/admin-guide/sysrq.rst
@ -48,9 +48,10 @@ always allowed (by a user with admin privileges).
 How do I use the magic SysRq key?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-On x86   - You press the key combo :kbd:`ALT-SysRq-<command key>`.
+On x86
+	You press the key combo :kbd:`ALT-SysRq-<command key>`.

-.. note::
+	.. note::
 	   Some
           keyboards may not have a key labeled 'SysRq'. The 'SysRq' key is
           also known as the 'Print Screen' key. Also some keyboards cannot
@ -58,14 +59,15 @@ On x86   - You press the key combo :kbd:`ALT-SysRq-<command key>`.
 	   have better luck with press :kbd:`Alt`, press :kbd:`SysRq`,
 	   release :kbd:`SysRq`, press :kbd:`<command key>`, release everything.

-On SPARC - You press :kbd:`ALT-STOP-<command key>`, I believe.
+On SPARC
+	You press :kbd:`ALT-STOP-<command key>`, I believe.

 On the serial console (PC style standard serial ports only)
        You send a ``BREAK``, then within 5 seconds a command key. Sending
        ``BREAK`` twice is interpreted as a normal BREAK.

 On PowerPC
-	Press :kbd:`ALT - Print Screen` (or :kbd:`F13`) - :kbd:`<command key>`,
+	Press :kbd:`ALT - Print Screen` (or :kbd:`F13`) - :kbd:`<command key>`.
        :kbd:`Print Screen` (or :kbd:`F13`) - :kbd:`<command key>` may suffice.

 On other
@ -73,7 +75,7 @@ On other
        let me know so I can add them to this section.

 On all
-	write a character to /proc/sysrq-trigger.  e.g.::
+	Write a character to /proc/sysrq-trigger.  e.g.::

 		echo t > /proc/sysrq-trigger

@ -282,7 +284,7 @@ Just ask them on the linux-kernel mailing list:
 Credits
 ~~~~~~~

-Written by Mydraal <vulpyne@vulpyne.net>
-Updated by Adam Sulmicki <adam@cfar.umd.edu>
-Updated by Jeremy M. Dolan <jmd@turbogeek.org> 2001/01/28 10:15:59
-Added to by Crutcher Dunnavant <crutcher+kernel@datastacks.com>
+- Written by Mydraal <vulpyne@vulpyne.net>
+- Updated by Adam Sulmicki <adam@cfar.umd.edu>
+- Updated by Jeremy M. Dolan <jmd@turbogeek.org> 2001/01/28 10:15:59
+- Added to by Crutcher Dunnavant <crutcher+kernel@datastacks.com>
--- a/Documentation/arm/tcm.rst
+++ b/Documentation/arm/tcm.rst
@ -4,18 +4,18 @@ ARM TCM (Tightly-Coupled Memory) handling in Linux

 Written by Linus Walleij <linus.walleij@stericsson.com>

-Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory).
+Some ARM SoCs have a so-called TCM (Tightly-Coupled Memory).
 This is usually just a few (4-64) KiB of RAM inside the ARM
 processor.

-Due to being embedded inside the CPU The TCM has a
+Due to being embedded inside the CPU, the TCM has a
 Harvard-architecture, so there is an ITCM (instruction TCM)
 and a DTCM (data TCM). The DTCM can not contain any
 instructions, but the ITCM can actually contain data.
 The size of DTCM or ITCM is minimum 4KiB so the typical
 minimum configuration is 4KiB ITCM and 4KiB DTCM.

-ARM CPU:s have special registers to read out status, physical
+ARM CPUs have special registers to read out status, physical
 location and size of TCM memories. arch/arm/include/asm/cputype.h
 defines a CPUID_TCM register that you can read out from the
 system control coprocessor. Documentation from ARM can be found
--- a/Documentation/arm64/amu.rst
+++ b/Documentation/arm64/amu.rst
@ -0,0 +1,112 @@
+=======================================================
+Activity Monitors Unit (AMU) extension in AArch64 Linux
+=======================================================
+
+Author: Ionela Voinescu <ionela.voinescu@arm.com>
+
+Date: 2019-09-10
+
+This document briefly describes the provision of Activity Monitors Unit
+support in AArch64 Linux.
+
+
+Architecture overview
+---------------------
+
+The activity monitors extension is an optional extension introduced by the
+ARMv8.4 CPU architecture.
+
+The activity monitors unit, implemented in each CPU, provides performance
+counters intended for system management use. The AMU extension provides a
+system register interface to the counter registers and also supports an
+optional external memory-mapped interface.
+
+Version 1 of the Activity Monitors architecture implements a counter group
+of four fixed and architecturally defined 64-bit event counters.
+  - CPU cycle counter: increments at the frequency of the CPU.
+  - Constant counter: increments at the fixed frequency of the system
+    clock.
+  - Instructions retired: increments with every architecturally executed
+    instruction.
+  - Memory stall cycles: counts instruction dispatch stall cycles caused by
+    misses in the last level cache within the clock domain.
+
+When in WFI or WFE these counters do not increment.
+
+The Activity Monitors architecture provides space for up to 16 architected
+event counters. Future versions of the architecture may use this space to
+implement additional architected event counters.
+
+Additionally, version 1 implements a counter group of up to 16 auxiliary
+64-bit event counters.
+
+On cold reset all counters reset to 0.
+
+
+Basic support
+-------------
+
+The kernel can safely run a mix of CPUs with and without support for the
+activity monitors extension. Therefore, when CONFIG_ARM64_AMU_EXTN is
+selected we unconditionally enable the capability to allow any late CPU
+(secondary or hotplugged) to detect and use the feature.
+
+When the feature is detected on a CPU, we flag the availability of the
+feature but this does not guarantee the correct functionality of the
+counters, only the presence of the extension.
+
+Firmware (code running at higher exception levels, e.g. arm-tf) support is
+needed to:
+ - Enable access for lower exception levels (EL2 and EL1) to the AMU
+   registers.
+ - Enable the counters. If not enabled these will read as 0.
+ - Save/restore the counters before/after the CPU is being put/brought up
+   from the 'off' power state.
+
+When using kernels that have this feature enabled but boot with broken
+firmware the user may experience panics or lockups when accessing the
+counter registers. Even if these symptoms are not observed, the values
+returned by the register reads might not correctly reflect reality. Most
+commonly, the counters will read as 0, indicating that they are not
+enabled.
+
+If proper support is not provided in firmware it's best to disable
+CONFIG_ARM64_AMU_EXTN. To be noted that for security reasons, this does not
+bypass the setting of AMUSERENR_EL0 to trap accesses from EL0 (userspace) to
+EL1 (kernel). Therefore, firmware should still ensure accesses to AMU registers
+are not trapped in EL2/EL3.
+
+The fixed counters of AMUv1 are accessible though the following system
+register definitions:
+ - SYS_AMEVCNTR0_CORE_EL0
+ - SYS_AMEVCNTR0_CONST_EL0
+ - SYS_AMEVCNTR0_INST_RET_EL0
+ - SYS_AMEVCNTR0_MEM_STALL_EL0
+
+Auxiliary platform specific counters can be accessed using
+SYS_AMEVCNTR1_EL0(n), where n is a value between 0 and 15.
+
+Details can be found in: arch/arm64/include/asm/sysreg.h.
+
+
+Userspace access
+----------------
+
+Currently, access from userspace to the AMU registers is disabled due to:
+ - Security reasons: they might expose information about code executed in
+   secure mode.
+ - Purpose: AMU counters are intended for system management use.
+
+Also, the presence of the feature is not visible to userspace.
+
+
+Virtualization
+--------------
+
+Currently, access from userspace (EL0) and kernelspace (EL1) on the KVM
+guest side is disabled due to:
+ - Security reasons: they might expose information about code executed
+   by other guests or the host.
+
+Any attempt to access the AMU registers will result in an UNDEFINED
+exception being injected into the guest.
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@ -248,6 +248,20 @@ Before jumping into the kernel, the following conditions must be met:
    - HCR_EL2.APK (bit 40) must be initialised to 0b1
    - HCR_EL2.API (bit 41) must be initialised to 0b1

+  For CPUs with Activity Monitors Unit v1 (AMUv1) extension present:
+  - If EL3 is present:
+    CPTR_EL3.TAM (bit 30) must be initialised to 0b0
+    CPTR_EL2.TAM (bit 30) must be initialised to 0b0
+    AMCNTENSET0_EL0 must be initialised to 0b1111
+    AMCNTENSET1_EL0 must be initialised to a platform specific value
+    having 0b1 set for the corresponding bit for each of the auxiliary
+    counters present.
+  - If the kernel is entered at EL1:
+    AMCNTENSET0_EL0 must be initialised to 0b1111
+    AMCNTENSET1_EL0 must be initialised to a platform specific value
+    having 0b1 set for the corresponding bit for each of the auxiliary
+    counters present.
+
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
 enter the kernel in the same exception level.
--- a/Documentation/arm64/index.rst
+++ b/Documentation/arm64/index.rst
@ -6,6 +6,7 @@ ARM64 Architecture
    :maxdepth: 1

    acpi_object_usage
+    amu
    arm-acpi
    booting
    cpu-feature-registers
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@ -110,6 +110,8 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
 +----------------+-----------------+-----------------+-----------------------------+
+| Cavium         | ThunderX GICv3  | #38539          | N/A                         |
+----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX Core   | #30115          | CAVIUM_ERRATUM_30115        |
--- a/Documentation/block/capability.rst
+++ b/Documentation/block/capability.rst
@ -2,17 +2,9 @@
 Generic Block Device Capability
 ===============================

-This file documents the sysfs file block/<disk>/capability
+This file documents the sysfs file ``block/<disk>/capability``.

-capability is a hex word indicating which capabilities a specific disk
-supports.  For more information on bits not listed here, see
-include/linux/genhd.h
+``capability`` is a bitfield, printed in hexadecimal, indicating which
+capabilities a specific block device supports:

-GENHD_FL_MEDIA_CHANGE_NOTIFY
----------------------------
-
-Value: 4
-
-When this bit is set, the disk supports Asynchronous Notification
-of media change events.  These events will be broadcast to user
-space via kernel uevent.
+.. kernel-doc:: include/linux/genhd.h
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@ -20,11 +20,11 @@ Reporting bugs
 Q: How do I report bugs for BPF kernel code?
 --------------------------------------------
 A: Since all BPF kernel development as well as bpftool and iproute2 BPF
-loader development happens through the netdev kernel mailing list,
+loader development happens through the bpf kernel mailing list,
 please report any found issues around BPF to the following mailing
 list:

- netdev@vger.kernel.org
+ bpf@vger.kernel.org

 This may also include issues related to XDP, BPF tracing, etc.

@ -46,17 +46,12 @@ Submitting patches

 Q: To which mailing list do I need to submit my BPF patches?
 ------------------------------------------------------------
-A: Please submit your BPF patches to the netdev kernel mailing list:
+A: Please submit your BPF patches to the bpf kernel mailing list:

- netdev@vger.kernel.org
-
-Historically, BPF came out of networking and has always been maintained
-by the kernel networking community. Although these days BPF touches
-many other subsystems as well, the patches are still routed mainly
-through the networking community.
+ bpf@vger.kernel.org

 In case your patch has changes in various different subsystems (e.g.
-tracing, security, etc), make sure to Cc the related kernel mailing
+networking, tracing, security, etc), make sure to Cc the related kernel mailing
 lists and maintainers from there as well, so they are able to review
 the changes and provide their Acked-by's to the patches.

@ -168,7 +163,7 @@ a BPF point of view.
 Be aware that this is not a final verdict that the patch will
 automatically get accepted into net or net-next trees eventually:

-On the netdev kernel mailing list reviews can come in at any point
+On the bpf kernel mailing list reviews can come in at any point
 in time. If discussions around a patch conclude that they cannot
 get included as-is, we will either apply a follow-up fix or drop
 them from the trees entirely. Therefore, we also reserve to rebase
@ -494,15 +489,15 @@ A: You need cmake and gcc-c++ as build requisites for LLVM. Once you have
 that set up, proceed with building the latest LLVM and clang version
 from the git repositories::

-     $ git clone http://llvm.org/git/llvm.git
-     $ cd llvm/tools
-     $ git clone --depth 1 http://llvm.org/git/clang.git
-     $ cd ..; mkdir build; cd build
-     $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
+     $ git clone https://github.com/llvm/llvm-project.git
+     $ mkdir -p llvm-project/llvm/build/install
+     $ cd llvm-project/llvm/build
+     $ cmake .. -G "Ninja" -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
+                -DLLVM_ENABLE_PROJECTS="clang"    \
                -DBUILD_SHARED_LIBS=OFF           \
                -DCMAKE_BUILD_TYPE=Release        \
                -DLLVM_BUILD_RUNTIME=OFF
-     $ make -j $(getconf _NPROCESSORS_ONLN)
+     $ ninja

 The built binaries can then be found in the build/bin/ directory, where
 you can point the PATH variable to.
--- a/Documentation/bpf/bpf_lsm.rst
+++ b/Documentation/bpf/bpf_lsm.rst
@ -0,0 +1,142 @@
+.. SPDX-License-Identifier: GPL-2.0+
+.. Copyright (C) 2020 Google LLC.
+
+================
+LSM BPF Programs
+================
+
+These BPF programs allow runtime instrumentation of the LSM hooks by privileged
+users to implement system-wide MAC (Mandatory Access Control) and Audit
+policies using eBPF.
+
+Structure
+---------
+
+The example shows an eBPF program that can be attached to the ``file_mprotect``
+LSM hook:
+
+.. c:function:: int file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot);
+
+Other LSM hooks which can be instrumented can be found in
+``include/linux/lsm_hooks.h``.
+
+eBPF programs that use :doc:`/bpf/btf` do not need to include kernel headers
+for accessing information from the attached eBPF program's context. They can
+simply declare the structures in the eBPF program and only specify the fields
+that need to be accessed.
+
+.. code-block:: c
+
+	struct mm_struct {
+		unsigned long start_brk, brk, start_stack;
+	} __attribute__((preserve_access_index));
+
+	struct vm_area_struct {
+		unsigned long start_brk, brk, start_stack;
+		unsigned long vm_start, vm_end;
+		struct mm_struct *vm_mm;
+	} __attribute__((preserve_access_index));
+
+
+.. note:: The order of the fields is irrelevant.
+
+This can be further simplified (if one has access to the BTF information at
+build time) by generating the ``vmlinux.h`` with:
+
+.. code-block:: console
+
+	# bpftool btf dump file <path-to-btf-vmlinux> format c > vmlinux.h
+
+.. note:: ``path-to-btf-vmlinux`` can be ``/sys/kernel/btf/vmlinux`` if the
+	  build environment matches the environment the BPF programs are
+	  deployed in.
+
+The ``vmlinux.h`` can then simply be included in the BPF programs without
+requiring the definition of the types.
+
+The eBPF programs can be declared using the``BPF_PROG``
+macros defined in `tools/lib/bpf/bpf_tracing.h`_. In this
+example:
+
+	* ``"lsm/file_mprotect"`` indicates the LSM hook that the program must
+	  be attached to
+	* ``mprotect_audit`` is the name of the eBPF program
+
+.. code-block:: c
+
+	SEC("lsm/file_mprotect")
+	int BPF_PROG(mprotect_audit, struct vm_area_struct *vma,
+		     unsigned long reqprot, unsigned long prot, int ret)
+	{
+		/* ret is the return value from the previous BPF program
+		 * or 0 if it's the first hook.
+		 */
+		if (ret != 0)
+			return ret;
+
+		int is_heap;
+
+		is_heap = (vma->vm_start >= vma->vm_mm->start_brk &&
+			   vma->vm_end <= vma->vm_mm->brk);
+
+		/* Return an -EPERM or write information to the perf events buffer
+		 * for auditing
+		 */
+		if (is_heap)
+			return -EPERM;
+	}
+
+The ``__attribute__((preserve_access_index))`` is a clang feature that allows
+the BPF verifier to update the offsets for the access at runtime using the
+:doc:`/bpf/btf` information. Since the BPF verifier is aware of the types, it
+also validates all the accesses made to the various types in the eBPF program.
+
+Loading
+-------
+
+eBPF programs can be loaded with the :manpage:`bpf(2)` syscall's
+``BPF_PROG_LOAD`` operation:
+
+.. code-block:: c
+
+	struct bpf_object *obj;
+
+	obj = bpf_object__open("./my_prog.o");
+	bpf_object__load(obj);
+
+This can be simplified by using a skeleton header generated by ``bpftool``:
+
+.. code-block:: console
+
+	# bpftool gen skeleton my_prog.o > my_prog.skel.h
+
+and the program can be loaded by including ``my_prog.skel.h`` and using
+the generated helper, ``my_prog__open_and_load``.
+
+Attachment to LSM Hooks
+-----------------------
+
+The LSM allows attachment of eBPF programs as LSM hooks using :manpage:`bpf(2)`
+syscall's ``BPF_RAW_TRACEPOINT_OPEN`` operation or more simply by
+using the libbpf helper ``bpf_program__attach_lsm``.
+
+The program can be detached from the LSM hook by *destroying* the ``link``
+link returned by ``bpf_program__attach_lsm`` using ``bpf_link__destroy``.
+
+One can also use the helpers generated in ``my_prog.skel.h`` i.e.
+``my_prog__attach`` for attachment and ``my_prog__destroy`` for cleaning up.
+
+Examples
+--------
+
+An example eBPF program can be found in
+`tools/testing/selftests/bpf/progs/lsm.c`_ and the corresponding
+userspace code in `tools/testing/selftests/bpf/prog_tests/test_lsm.c`_
+
+.. Links
+.. _tools/lib/bpf/bpf_tracing.h:
+   https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/lib/bpf/bpf_tracing.h
+.. _tools/testing/selftests/bpf/progs/lsm.c:
+   https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/progs/lsm.c
+.. _tools/testing/selftests/bpf/prog_tests/test_lsm.c:
+   https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/prog_tests/test_lsm.c
--- a/Documentation/bpf/drgn.rst
+++ b/Documentation/bpf/drgn.rst
@ -0,0 +1,213 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+==============
+BPF drgn tools
+==============
+
+drgn scripts is a convenient and easy to use mechanism to retrieve arbitrary
+kernel data structures. drgn is not relying on kernel UAPI to read the data.
+Instead it's reading directly from ``/proc/kcore`` or vmcore and pretty prints
+the data based on DWARF debug information from vmlinux.
+
+This document describes BPF related drgn tools.
+
+See `drgn/tools`_ for all tools available at the moment and `drgn/doc`_ for
+more details on drgn itself.
+
+bpf_inspect.py
+--------------
+
+Description
+===========
+
+`bpf_inspect.py`_ is a tool intended to inspect BPF programs and maps. It can
+iterate over all programs and maps in the system and print basic information
+about these objects, including id, type and name.
+
+The main use-case `bpf_inspect.py`_ covers is to show BPF programs of types
+``BPF_PROG_TYPE_EXT`` and ``BPF_PROG_TYPE_TRACING`` attached to other BPF
+programs via ``freplace``/``fentry``/``fexit`` mechanisms, since there is no
+user-space API to get this information.
+
+Getting started
+===============
+
+List BPF programs (full names are obtained from BTF)::
+
+    % sudo bpf_inspect.py prog
+        27: BPF_PROG_TYPE_TRACEPOINT         tracepoint__tcp__tcp_send_reset
+      4632: BPF_PROG_TYPE_CGROUP_SOCK_ADDR   tw_ipt_bind
+     49464: BPF_PROG_TYPE_RAW_TRACEPOINT     raw_tracepoint__sched_process_exit
+
+List BPF maps::
+
+      % sudo bpf_inspect.py map
+        2577: BPF_MAP_TYPE_HASH                tw_ipt_vips
+        4050: BPF_MAP_TYPE_STACK_TRACE         stack_traces
+        4069: BPF_MAP_TYPE_PERCPU_ARRAY        ned_dctcp_cntr
+
+Find BPF programs attached to BPF program ``test_pkt_access``::
+
+      % sudo bpf_inspect.py p | grep test_pkt_access
+         650: BPF_PROG_TYPE_SCHED_CLS          test_pkt_access
+         654: BPF_PROG_TYPE_TRACING            test_main                        linked:[650->25: BPF_TRAMP_FEXIT test_pkt_access->test_pkt_access()]
+         655: BPF_PROG_TYPE_TRACING            test_subprog1                    linked:[650->29: BPF_TRAMP_FEXIT test_pkt_access->test_pkt_access_subprog1()]
+         656: BPF_PROG_TYPE_TRACING            test_subprog2                    linked:[650->31: BPF_TRAMP_FEXIT test_pkt_access->test_pkt_access_subprog2()]
+         657: BPF_PROG_TYPE_TRACING            test_subprog3                    linked:[650->21: BPF_TRAMP_FEXIT test_pkt_access->test_pkt_access_subprog3()]
+         658: BPF_PROG_TYPE_EXT                new_get_skb_len                  linked:[650->16: BPF_TRAMP_REPLACE test_pkt_access->get_skb_len()]
+         659: BPF_PROG_TYPE_EXT                new_get_skb_ifindex              linked:[650->23: BPF_TRAMP_REPLACE test_pkt_access->get_skb_ifindex()]
+         660: BPF_PROG_TYPE_EXT                new_get_constant                 linked:[650->19: BPF_TRAMP_REPLACE test_pkt_access->get_constant()]
+
+It can be seen that there is a program ``test_pkt_access``, id 650 and there
+are multiple other tracing and ext programs attached to functions in
+``test_pkt_access``.
+
+For example the line::
+
+         658: BPF_PROG_TYPE_EXT                new_get_skb_len                  linked:[650->16: BPF_TRAMP_REPLACE test_pkt_access->get_skb_len()]
+
+, means that BPF program id 658, type ``BPF_PROG_TYPE_EXT``, name
+``new_get_skb_len`` replaces (``BPF_TRAMP_REPLACE``) function ``get_skb_len()``
+that has BTF id 16 in BPF program id 650, name ``test_pkt_access``.
+
+Getting help:
+
+.. code-block:: none
+
+    % sudo bpf_inspect.py
+    usage: bpf_inspect.py [-h] {prog,p,map,m} ...
+
+    drgn script to list BPF programs or maps and their properties
+    unavailable via kernel API.
+
+    See https://github.com/osandov/drgn/ for more details on drgn.
+
+    optional arguments:
+      -h, --help      show this help message and exit
+
+    subcommands:
+      {prog,p,map,m}
+        prog (p)      list BPF programs
+        map (m)       list BPF maps
+
+Customization
+=============
+
+The script is intended to be customized by developers to print relevant
+information about BPF programs, maps and other objects.
+
+For example, to print ``struct bpf_prog_aux`` for BPF program id 53077:
+
+.. code-block:: none
+
+    % git diff
+    diff --git a/tools/bpf_inspect.py b/tools/bpf_inspect.py
+    index 650e228..aea2357 100755
+    --- a/tools/bpf_inspect.py
+    +++ b/tools/bpf_inspect.py
+    @@ -112,7 +112,9 @@ def list_bpf_progs(args):
+             if linked:
+                 linked = f" linked:[{linked}]"
+
+    -        print(f"{id_:>6}: {type_:32} {name:32} {linked}")
+    +        if id_ == 53077:
+    +            print(f"{id_:>6}: {type_:32} {name:32}")
+    +            print(f"{bpf_prog.aux}")
+
+
+     def list_bpf_maps(args):
+
+It produces the output::
+
+    % sudo bpf_inspect.py p
+     53077: BPF_PROG_TYPE_XDP                tw_xdp_policer
+    *(struct bpf_prog_aux *)0xffff8893fad4b400 = {
+            .refcnt = (atomic64_t){
+                    .counter = (long)58,
+            },
+            .used_map_cnt = (u32)1,
+            .max_ctx_offset = (u32)8,
+            .max_pkt_offset = (u32)15,
+            .max_tp_access = (u32)0,
+            .stack_depth = (u32)8,
+            .id = (u32)53077,
+            .func_cnt = (u32)0,
+            .func_idx = (u32)0,
+            .attach_btf_id = (u32)0,
+            .linked_prog = (struct bpf_prog *)0x0,
+            .verifier_zext = (bool)0,
+            .offload_requested = (bool)0,
+            .attach_btf_trace = (bool)0,
+            .func_proto_unreliable = (bool)0,
+            .trampoline_prog_type = (enum bpf_tramp_prog_type)BPF_TRAMP_FENTRY,
+            .trampoline = (struct bpf_trampoline *)0x0,
+            .tramp_hlist = (struct hlist_node){
+                    .next = (struct hlist_node *)0x0,
+                    .pprev = (struct hlist_node **)0x0,
+            },
+            .attach_func_proto = (const struct btf_type *)0x0,
+            .attach_func_name = (const char *)0x0,
+            .func = (struct bpf_prog **)0x0,
+            .jit_data = (void *)0x0,
+            .poke_tab = (struct bpf_jit_poke_descriptor *)0x0,
+            .size_poke_tab = (u32)0,
+            .ksym_tnode = (struct latch_tree_node){
+                    .node = (struct rb_node [2]){
+                            {
+                                    .__rb_parent_color = (unsigned long)18446612956263126665,
+                                    .rb_right = (struct rb_node *)0x0,
+                                    .rb_left = (struct rb_node *)0xffff88a0be3d0088,
+                            },
+                            {
+                                    .__rb_parent_color = (unsigned long)18446612956263126689,
+                                    .rb_right = (struct rb_node *)0x0,
+                                    .rb_left = (struct rb_node *)0xffff88a0be3d00a0,
+                            },
+                    },
+            },
+            .ksym_lnode = (struct list_head){
+                    .next = (struct list_head *)0xffff88bf481830b8,
+                    .prev = (struct list_head *)0xffff888309f536b8,
+            },
+            .ops = (const struct bpf_prog_ops *)xdp_prog_ops+0x0 = 0xffffffff820fa350,
+            .used_maps = (struct bpf_map **)0xffff889ff795de98,
+            .prog = (struct bpf_prog *)0xffffc9000cf2d000,
+            .user = (struct user_struct *)root_user+0x0 = 0xffffffff82444820,
+            .load_time = (u64)2408348759285319,
+            .cgroup_storage = (struct bpf_map *[2]){},
+            .name = (char [16])"tw_xdp_policer",
+            .security = (void *)0xffff889ff795d548,
+            .offload = (struct bpf_prog_offload *)0x0,
+            .btf = (struct btf *)0xffff8890ce6d0580,
+            .func_info = (struct bpf_func_info *)0xffff889ff795d240,
+            .func_info_aux = (struct bpf_func_info_aux *)0xffff889ff795de20,
+            .linfo = (struct bpf_line_info *)0xffff888a707afc00,
+            .jited_linfo = (void **)0xffff8893fad48600,
+            .func_info_cnt = (u32)1,
+            .nr_linfo = (u32)37,
+            .linfo_idx = (u32)0,
+            .num_exentries = (u32)0,
+            .extable = (struct exception_table_entry *)0xffffffffa032d950,
+            .stats = (struct bpf_prog_stats *)0x603fe3a1f6d0,
+            .work = (struct work_struct){
+                    .data = (atomic_long_t){
+                            .counter = (long)0,
+                    },
+                    .entry = (struct list_head){
+                            .next = (struct list_head *)0x0,
+                            .prev = (struct list_head *)0x0,
+                    },
+                    .func = (work_func_t)0x0,
+            },
+            .rcu = (struct callback_head){
+                    .next = (struct callback_head *)0x0,
+                    .func = (void (*)(struct callback_head *))0x0,
+            },
+    }
+
+
+.. Links
+.. _drgn/doc: https://drgn.readthedocs.io/en/latest/
+.. _drgn/tools: https://github.com/osandov/drgn/tree/master/tools
+.. _bpf_inspect.py:
+   https://github.com/osandov/drgn/blob/master/tools/bpf_inspect.py
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@ -45,14 +45,16 @@ Program types
   prog_cgroup_sockopt
   prog_cgroup_sysctl
   prog_flow_dissector
+   bpf_lsm


-Testing BPF
-===========
+Testing and debugging BPF
+=========================

 .. toctree::
   :maxdepth: 1

+   drgn
   s390


--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@ -38,7 +38,11 @@ needs_sphinx = '1.3'
 # ones.
 extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include', 'cdomain',
              'kfigure', 'sphinx.ext.ifconfig', 'automarkup',
-              'maintainers_include']
+              'maintainers_include', 'sphinx.ext.autosectionlabel' ]
+
+# Ensure that autosectionlabel will produce unique names
+autosectionlabel_prefix_document = True
+autosectionlabel_maxdepth = 2

 # The name of the math extension changed on Sphinx 1.4
 if (major == 1 and minor > 3) or (major > 1):
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@ -8,41 +8,81 @@ This is the beginning of a manual for core kernel APIs.  The conversion
 Core utilities
 ==============

+This section has general and "core core" documentation.  The first is a
+massive grab-bag of kerneldoc info left over from the docbook days; it
+should really be broken up someday when somebody finds the energy to do
+it.
+
 .. toctree::
   :maxdepth: 1

   kernel-api
-   assoc_array
-   atomic_ops
-   cachetlb
-   refcount-vs-atomic
-   cpu_hotplug
-   idr
-   local_ops
   workqueue
-   genericirq
-   xarray
-   librs
-   genalloc
-   errseq
-   packing
   printk-formats
+   symbol-namespaces
+
+Data structures and low-level utilities
+=======================================
+
+Library functionality that is used throughout the kernel.
+
+.. toctree::
+   :maxdepth: 1
+
+   kobject
+   assoc_array
+   xarray
+   idr
   circular-buffers
   generic-radix-tree
+   packing
+   timekeeping
+   errseq
+
+Concurrency primitives
+======================
+
+How Linux keeps everything from happening at the same time.  See
+:doc:`/locking/index` for more related documentation.
+
+.. toctree::
+   :maxdepth: 1
+
+   atomic_ops
+   refcount-vs-atomic
+   local_ops
+   padata
+   ../RCU/index
+
+Low-level hardware management
+=============================
+
+Cache management, managing CPU hotplug, etc.
+
+.. toctree::
+   :maxdepth: 1
+
+   cachetlb
+   cpu_hotplug
+   memory-hotplug
+   genericirq
+   protection-keys
+
+Memory management
+=================
+
+How to allocate and use memory in the kernel.  Note that there is a lot
+more memory-management documentation in :doc:`/vm/index`.
+
+.. toctree::
+   :maxdepth: 1
+
   memory-allocation
   mm-api
+   genalloc
   pin_user_pages
-   gfp_mask-from-fs-io
-   timekeeping
   boot-time-mm
-   memory-hotplug
-   protection-keys
-   ../RCU/index
-   gcc-plugins
-   symbol-namespaces
-   padata
-   ioctl
-
+   gfp_mask-from-fs-io

 Interfaces for kernel debugging
 ===============================
@ -53,6 +93,16 @@ Interfaces for kernel debugging
   debug-objects
   tracepoint

+Everything else
+===============
+
+Documents that don't fit elsewhere or which have yet to be categorized.
+
+.. toctree::
+   :maxdepth: 1
+
+   librs
+
 .. only:: subproject and html

   Indices
--- a/Documentation/core-api/kobject.rst
+++ b/Documentation/core-api/kobject.rst
@ -25,7 +25,7 @@ some terms we will be working with.
   usually embedded within some other structure which contains the stuff
   the code is really interested in.

-   No structure should EVER have more than one kobject embedded within it.
+   No structure should **EVER** have more than one kobject embedded within it.
   If it does, the reference counting for the object is sure to be messed
   up and incorrect, and your code will be buggy.  So do not do this.

@ -55,7 +55,7 @@ a larger, domain-specific object.  To this end, kobjects will be found
 embedded in other structures.  If you are used to thinking of things in
 object-oriented terms, kobjects can be seen as a top-level, abstract class
 from which other classes are derived.  A kobject implements a set of
-capabilities which are not particularly useful by themselves, but which are
+capabilities which are not particularly useful by themselves, but are
 nice to have in other objects.  The C language does not allow for the
 direct expression of inheritance, so other techniques - such as structure
 embedding - must be used.
@ -65,12 +65,12 @@ this is analogous as to how "list_head" structs are rarely useful on
 their own, but are invariably found embedded in the larger objects of
 interest.)

-So, for example, the UIO code in drivers/uio/uio.c has a structure that
+So, for example, the UIO code in ``drivers/uio/uio.c`` has a structure that
 defines the memory region associated with a uio device::

    struct uio_map {
-	struct kobject kobj;
-	struct uio_mem *mem;
+            struct kobject kobj;
+            struct uio_mem *mem;
    };

 If you have a struct uio_map structure, finding its embedded kobject is
@ -78,30 +78,30 @@ just a matter of using the kobj member.  Code that works with kobjects will
 often have the opposite problem, however: given a struct kobject pointer,
 what is the pointer to the containing structure?  You must avoid tricks
 (such as assuming that the kobject is at the beginning of the structure)
-and, instead, use the container_of() macro, found in <linux/kernel.h>::
+and, instead, use the container_of() macro, found in ``<linux/kernel.h>``::

    container_of(pointer, type, member)

 where:

-  * "pointer" is the pointer to the embedded kobject,
-  * "type" is the type of the containing structure, and
-  * "member" is the name of the structure field to which "pointer" points.
+  * ``pointer`` is the pointer to the embedded kobject,
+  * ``type`` is the type of the containing structure, and
+  * ``member`` is the name of the structure field to which ``pointer`` points.

 The return value from container_of() is a pointer to the corresponding
-container type. So, for example, a pointer "kp" to a struct kobject
-embedded *within* a struct uio_map could be converted to a pointer to the
-*containing* uio_map structure with::
+container type. So, for example, a pointer ``kp`` to a struct kobject
+embedded **within** a struct uio_map could be converted to a pointer to the
+**containing** uio_map structure with::

    struct uio_map *u_map = container_of(kp, struct uio_map, kobj);

-For convenience, programmers often define a simple macro for "back-casting"
+For convenience, programmers often define a simple macro for **back-casting**
 kobject pointers to the containing type.  Exactly this happens in the
-earlier drivers/uio/uio.c, as you can see here::
+earlier ``drivers/uio/uio.c``, as you can see here::

    struct uio_map {
-        struct kobject kobj;
-        struct uio_mem *mem;
+            struct kobject kobj;
+            struct uio_mem *mem;
    };

    #define to_map(map) container_of(map, struct uio_map, kobj)
@ -125,7 +125,7 @@ must have an associated kobj_type.  After calling kobject_init(), to
 register the kobject with sysfs, the function kobject_add() must be called::

    int kobject_add(struct kobject *kobj, struct kobject *parent,
-		    const char *fmt, ...);
+                    const char *fmt, ...);

 This sets up the parent of the kobject and the name for the kobject
 properly.  If the kobject is to be associated with a specific kset,
@ -172,13 +172,13 @@ call to kobject_uevent()::

    int kobject_uevent(struct kobject *kobj, enum kobject_action action);

-Use the KOBJ_ADD action for when the kobject is first added to the kernel.
+Use the **KOBJ_ADD** action for when the kobject is first added to the kernel.
 This should be done only after any attributes or children of the kobject
 have been initialized properly, as userspace will instantly start to look
 for them when this call happens.

 When the kobject is removed from the kernel (details on how to do that are
-below), the uevent for KOBJ_REMOVE will be automatically created by the
+below), the uevent for **KOBJ_REMOVE** will be automatically created by the
 kobject core, so the caller does not have to worry about doing that by
 hand.

@ -238,7 +238,7 @@ Both types of attributes used here, with a kobject that has been created
 with the kobject_create_and_add(), can be of type kobj_attribute, so no
 special custom attribute is needed to be created.

-See the example module, samples/kobject/kobject-example.c for an
+See the example module, ``samples/kobject/kobject-example.c`` for an
 implementation of a simple kobject and attributes.


@ -270,10 +270,10 @@ such a method has a form like::

    void my_object_release(struct kobject *kobj)
    {
-    	    struct my_object *mine = container_of(kobj, struct my_object, kobj);
+            struct my_object *mine = container_of(kobj, struct my_object, kobj);

-	    /* Perform any additional cleanup on this object, then... */
-	    kfree(mine);
+            /* Perform any additional cleanup on this object, then... */
+            kfree(mine);
    }

 One important point cannot be overstated: every kobject must have a
@ -297,11 +297,11 @@ instead, it is associated with the ktype. So let us introduce struct
 kobj_type::

    struct kobj_type {
-	    void (*release)(struct kobject *kobj);
-	    const struct sysfs_ops *sysfs_ops;
-	    struct attribute **default_attrs;
-	    const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
-	    const void *(*namespace)(struct kobject *kobj);
+            void (*release)(struct kobject *kobj);
+            const struct sysfs_ops *sysfs_ops;
+            struct attribute **default_attrs;
+            const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
+            const void *(*namespace)(struct kobject *kobj);
    };

 This structure is used to describe a particular type of kobject (or, more
@ -352,8 +352,8 @@ created and never declared statically or on the stack.  To create a new
 kset use::

  struct kset *kset_create_and_add(const char *name,
-				   struct kset_uevent_ops *u,
-				   struct kobject *parent);
+                                   struct kset_uevent_ops *u,
+                                   struct kobject *parent);

 When you are finished with the kset, call::

@ -365,16 +365,16 @@ Because other references to the kset may still exist, the release may happen
 after kset_unregister() returns.

 An example of using a kset can be seen in the
-samples/kobject/kset-example.c file in the kernel tree.
+``samples/kobject/kset-example.c`` file in the kernel tree.

 If a kset wishes to control the uevent operations of the kobjects
 associated with it, it can use the struct kset_uevent_ops to handle it::

  struct kset_uevent_ops {
-        int (*filter)(struct kset *kset, struct kobject *kobj);
-        const char *(*name)(struct kset *kset, struct kobject *kobj);
-        int (*uevent)(struct kset *kset, struct kobject *kobj,
-                      struct kobj_uevent_env *env);
+          int (*filter)(struct kset *kset, struct kobject *kobj);
+          const char *(*name)(struct kset *kset, struct kobject *kobj);
+          int (*uevent)(struct kset *kset, struct kobject *kobj,
+                        struct kobj_uevent_env *env);
  };


@ -408,8 +408,8 @@ Kobject removal
 After a kobject has been registered with the kobject core successfully, it
 must be cleaned up when the code is finished with it.  To do that, call
 kobject_put().  By doing this, the kobject core will automatically clean up
-all of the memory allocated by this kobject.  If a KOBJ_ADD uevent has been
-sent for the object, a corresponding KOBJ_REMOVE uevent will be sent, and
+all of the memory allocated by this kobject.  If a ``KOBJ_ADD`` uevent has been
+sent for the object, a corresponding ``KOBJ_REMOVE`` uevent will be sent, and
 any other sysfs housekeeping will be handled for the caller properly.

 If you need to do a two-stage delete of the kobject (say you are not
@ -430,5 +430,5 @@ Example code to copy from
 =========================

 For a more complete example of using ksets and kobjects properly, see the
-example programs samples/kobject/{kobject-example.c,kset-example.c},
-which will be built as loadable modules if you select CONFIG_SAMPLE_KOBJECT.
+example programs ``samples/kobject/{kobject-example.c,kset-example.c}``,
+which will be built as loadable modules if you select ``CONFIG_SAMPLE_KOBJECT``.
--- a/Documentation/core-api/mm-api.rst
+++ b/Documentation/core-api/mm-api.rst
@ -73,6 +73,9 @@ File Mapping and Page Cache
 .. kernel-doc:: mm/truncate.c
   :export:

+.. kernel-doc:: include/linux/pagemap.h
+   :internal:
+
 Memory pools
 ============

--- a/Documentation/core-api/pin_user_pages.rst
+++ b/Documentation/core-api/pin_user_pages.rst
@ -52,8 +52,22 @@ Which flags are set by each wrapper

 For these pin_user_pages*() functions, FOLL_PIN is OR'd in with whatever gup
 flags the caller provides. The caller is required to pass in a non-null struct
-pages* array, and the function then pin pages by incrementing each by a special
-value. For now, that value is +1, just like get_user_pages*().::
+pages* array, and the function then pins pages by incrementing each by a special
+value: GUP_PIN_COUNTING_BIAS.
+
+For huge pages (and in fact, any compound page of more than 2 pages), the
+GUP_PIN_COUNTING_BIAS scheme is not used. Instead, an exact form of pin counting
+is achieved, by using the 3rd struct page in the compound page. A new struct
+page field, hpage_pinned_refcount, has been added in order to support this.
+
+This approach for compound pages avoids the counting upper limit problems that
+are discussed below. Those limitations would have been aggravated severely by
+huge pages, because each tail page adds a refcount to the head page. And in
+fact, testing revealed that, without a separate hpage_pinned_refcount field,
+page overflows were seen in some huge page stress tests.
+
+This also means that huge pages and compound pages (of order > 1) do not suffer
+from the false positives problem that is mentioned below.::

 Function
 --------
@ -99,27 +113,6 @@ pages:
 This also leads to limitations: there are only 31-10==21 bits available for a
 counter that increments 10 bits at a time.

-TODO: for 1GB and larger huge pages, this is cutting it close. That's because
-when pin_user_pages() follows such pages, it increments the head page by "1"
-(where "1" used to mean "+1" for get_user_pages(), but now means "+1024" for
-pin_user_pages()) for each tail page. So if you have a 1GB huge page:
-
-* There are 256K (18 bits) worth of 4 KB tail pages.
-* There are 21 bits available to count up via GUP_PIN_COUNTING_BIAS (that is,
-  10 bits at a time)
-* There are 21 - 18 == 3 bits available to count. Except that there aren't,
-  because you need to allow for a few normal get_page() calls on the head page,
-  as well. Fortunately, the approach of using addition, rather than "hard"
-  bitfields, within page->_refcount, allows for sharing these bits gracefully.
-  But we're still looking at about 8 references.
-
-This, however, is a missing feature more than anything else, because it's easily
-solved by addressing an obvious inefficiency in the original get_user_pages()
-approach of retrieving pages: stop treating all the pages as if they were
-PAGE_SIZE. Retrieve huge pages as huge pages. The callers need to be aware of
-this, so some work is required. Once that's in place, this limitation mostly
-disappears from view, because there will be ample refcounting range available.
-
 * Callers must specifically request "dma-pinned tracking of pages". In other
  words, just calling get_user_pages() will not suffice; a new set of functions,
  pin_user_page() and related, must be used.
@ -173,8 +166,8 @@ CASE 4: Pinning for struct page manipulation only
 -------------------------------------------------
 Here, normal GUP calls are sufficient, so neither flag needs to be set.

-page_dma_pinned(): the whole point of pinning
-=============================================
+page_maybe_dma_pinned(): the whole point of pinning
+===================================================

 The whole point of marking pages as "DMA-pinned" or "gup-pinned" is to be able
 to query, "is this page DMA-pinned?" That allows code such as page_mkclean()
@ -186,7 +179,7 @@ and debates (see the References at the end of this document). It's a TODO item
 here: fill in the details once that's worked out. Meanwhile, it's safe to say
 that having this available: ::

-        static inline bool page_dma_pinned(struct page *page)
+        static inline bool page_maybe_dma_pinned(struct page *page)

 ...is a prerequisite to solving the long-running gup+DMA problem.

@ -215,12 +208,42 @@ has the following new calls to exercise the new pin*() wrapper functions:
 You can monitor how many total dma-pinned pages have been acquired and released
 since the system was booted, via two new /proc/vmstat entries: ::

-    /proc/vmstat/nr_foll_pin_requested
-    /proc/vmstat/nr_foll_pin_requested
+    /proc/vmstat/nr_foll_pin_acquired
+    /proc/vmstat/nr_foll_pin_released

-Those are both going to show zero, unless CONFIG_DEBUG_VM is set. This is
-because there is a noticeable performance drop in unpin_user_page(), when they
-are activated.
+Under normal conditions, these two values will be equal unless there are any
+long-term [R]DMA pins in place, or during pin/unpin transitions.
+
+* nr_foll_pin_acquired: This is the number of logical pins that have been
+  acquired since the system was powered on. For huge pages, the head page is
+  pinned once for each page (head page and each tail page) within the huge page.
+  This follows the same sort of behavior that get_user_pages() uses for huge
+  pages: the head page is refcounted once for each tail or head page in the huge
+  page, when get_user_pages() is applied to a huge page.
+
+* nr_foll_pin_released: The number of logical pins that have been released since
+  the system was powered on. Note that pages are released (unpinned) on a
+  PAGE_SIZE granularity, even if the original pin was applied to a huge page.
+  Becaused of the pin count behavior described above in "nr_foll_pin_acquired",
+  the accounting balances out, so that after doing this::
+
+    pin_user_pages(huge_page);
+    for (each page in huge_page)
+        unpin_user_page(page);
+
+...the following is expected::
+
+    nr_foll_pin_released == nr_foll_pin_acquired
+
+(...unless it was already out of balance due to a long-term RDMA pin being in
+place.)
+
+Other diagnostics
+=================
+
+dump_page() has been enhanced slightly, to handle these new counting fields, and
+to better report on compound pages in general. Specifically, for compound pages
+with order > 1, the exact (hpage_pinned_refcount) pincount is reported.

 References
 ==========
@ -228,5 +251,6 @@ References
 * `Some slow progress on get_user_pages() (Apr 2, 2019) <https://lwn.net/Articles/784574/>`_
 * `DMA and get_user_pages() (LPC: Dec 12, 2018) <https://lwn.net/Articles/774411/>`_
 * `The trouble with get_user_pages() (Apr 30, 2018) <https://lwn.net/Articles/753027/>`_
+* `LWN kernel index: get_user_pages() <https://lwn.net/Kernel/Index/#Memory_management-get_user_pages>`_

 John Hubbard, October, 2019
--- a/Documentation/core-api/timekeeping.rst
+++ b/Documentation/core-api/timekeeping.rst
@ -154,9 +154,9 @@ architectures. These are the recommended replacements:

 	Use ktime_get() or ktime_get_ts64() instead.

-.. c:function:: struct timeval do_gettimeofday( void )
-		struct timespec getnstimeofday( void )
-		struct timespec64 getnstimeofday64( void )
+.. c:function:: void do_gettimeofday( struct timeval * )
+		void getnstimeofday( struct timespec * )
+		void getnstimeofday64( struct timespec64 * )
 		void ktime_get_real_ts( struct timespec * )

 	ktime_get_real_ts64() is a direct replacement, but consider using
--- a/Documentation/cpu-freq/amd-powernow.txt
+++ b/Documentation/cpu-freq/amd-powernow.txt
@ -1,38 +0,0 @@
-
-PowerNow! and Cool'n'Quiet are AMD names for frequency
-management capabilities in AMD processors. As the hardware
-implementation changes in new generations of the processors,
-there is a different cpu-freq driver for each generation.
-
-Note that the driver's will not load on the "wrong" hardware,
-so it is safe to try each driver in turn when in doubt as to
-which is the correct driver.
-
-Note that the functionality to change frequency (and voltage)
-is not available in all processors. The drivers will refuse
-to load on processors without this capability. The capability
-is detected with the cpuid instruction.
-
-The drivers use BIOS supplied tables to obtain frequency and
-voltage information appropriate for a particular platform.
-Frequency transitions will be unavailable if the BIOS does
-not supply these tables.
-
-6th Generation: powernow-k6
-
-7th Generation: powernow-k7: Athlon, Duron, Geode.
-
-8th Generation: powernow-k8: Athlon, Athlon 64, Opteron, Sempron.
-Documentation on this functionality in 8th generation processors
-is available in the "BIOS and Kernel Developer's Guide", publication
-26094, in chapter 9, available for download from www.amd.com. 
-
-BIOS supplied data, for powernow-k7 and for powernow-k8, may be
-from either the PSB table or from ACPI objects. The ACPI support
-is only available if the kernel config sets CONFIG_ACPI_PROCESSOR.
-The powernow-k8 driver will attempt to use ACPI if so configured,
-and fall back to PST if that fails.
-The powernow-k7 driver will try to use the PSB support first, and
-fall back to ACPI if the PSB support fails. A module parameter,
-acpi_force, is provided to force ACPI support to be used instead 
-of PSB support.
--- a/Documentation/cpu-freq/core.rst
+++ b/Documentation/cpu-freq/core.rst
@ -1,31 +1,23 @@
-     CPU frequency and voltage scaling code in the Linux(TM) kernel
+.. SPDX-License-Identifier: GPL-2.0

+=============================================================
+General description of the CPUFreq core and CPUFreq notifiers
+=============================================================

-		         L i n u x    C P U F r e q
+Authors:
+	- Dominik Brodowski  <linux@brodo.de>
+	- David Kimdon <dwhedon@debian.org>
+	- Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+	- Viresh Kumar <viresh.kumar@linaro.org>

-			  C P U F r e q    C o r e
+.. Contents:

-
-		    Dominik Brodowski  <linux@brodo.de>
-		     David Kimdon <dwhedon@debian.org>
-		Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-		   Viresh Kumar <viresh.kumar@linaro.org>
-
-
-
-   Clock scaling allows you to change the clock speed of the CPUs on the
-    fly. This is a nice method to save battery power, because the lower
-            the clock speed, the less power the CPU consumes.
-
-
-Contents:
---------
-1.  CPUFreq core and interfaces
-2.  CPUFreq notifiers
-3.  CPUFreq Table Generation with Operating Performance Point (OPP)
+   1.  CPUFreq core and interfaces
+   2.  CPUFreq notifiers
+   3.  CPUFreq Table Generation with Operating Performance Point (OPP)

 1. General Information
-=======================
+======================

 The CPUFreq core code is located in drivers/cpufreq/cpufreq.c. This
 cpufreq code offers a standardized interface for the CPUFreq
@ -63,7 +55,7 @@ The phase is specified in the second argument to the notifier.  The phase is
 CPUFREQ_CREATE_POLICY when the policy is first created and it is
 CPUFREQ_REMOVE_POLICY when the policy is removed.

-The third argument, a void *pointer, points to a struct cpufreq_policy
+The third argument, a ``void *pointer``, points to a struct cpufreq_policy
 consisting of several values, including min, max (the lower and upper
 frequencies (in kHz) of the new policy).

@ -80,10 +72,13 @@ CPUFREQ_POSTCHANGE.

 The third argument is a struct cpufreq_freqs with the following
 values:
-cpu	- number of the affected CPU
-old	- old frequency
-new	- new frequency
-flags	- flags of the cpufreq driver
+
+=====	===========================
+cpu	number of the affected CPU
+old	old frequency
+new	new frequency
+flags	flags of the cpufreq driver
+=====	===========================

 3. CPUFreq Table Generation with Operating Performance Point (OPP)
 ==================================================================
@ -94,9 +89,12 @@ dev_pm_opp_init_cpufreq_table -
 	the OPP layer's internal information about the available frequencies
 	into a format readily providable to cpufreq.

-	WARNING: Do not use this function in interrupt context.
+	.. Warning::
+
+	   Do not use this function in interrupt context.
+
+	Example::

-	Example:
 	 soc_pm_init()
 	 {
 		/* Do things */
@ -106,7 +104,10 @@ dev_pm_opp_init_cpufreq_table -
 		/* Do other things */
 	 }

-	NOTE: This function is available only if CONFIG_CPU_FREQ is enabled in
-	addition to CONFIG_PM_OPP.
+	.. note::

-dev_pm_opp_free_cpufreq_table - Free up the table allocated by dev_pm_opp_init_cpufreq_table
+	   This function is available only if CONFIG_CPU_FREQ is enabled in
+	   addition to CONFIG_PM_OPP.
+
+dev_pm_opp_free_cpufreq_table
+	Free up the table allocated by dev_pm_opp_init_cpufreq_table
--- a/Documentation/cpu-freq/cpu-drivers.rst
+++ b/Documentation/cpu-freq/cpu-drivers.rst
@ -1,35 +1,27 @@
-     CPU frequency and voltage scaling code in the Linux(TM) kernel
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
+How to Implement a new CPUFreq Processor Driver
+===============================================
+
+Authors:


-		         L i n u x    C P U F r e q
+	- Dominik Brodowski  <linux@brodo.de>
+	- Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+	- Viresh Kumar <viresh.kumar@linaro.org>

-			   C P U   D r i v e r s 
+.. Contents

-		       - information for developers -
-
-
-		    Dominik Brodowski  <linux@brodo.de>
-		Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-		   Viresh Kumar <viresh.kumar@linaro.org>
-
-
-
-   Clock scaling allows you to change the clock speed of the CPUs on the
-    fly. This is a nice method to save battery power, because the lower
-            the clock speed, the less power the CPU consumes.
-
-
-Contents:
---------
-1.   What To Do?
-1.1  Initialization
-1.2  Per-CPU Initialization
-1.3  verify
-1.4  target/target_index or setpolicy?
-1.5  target/target_index
-1.6  setpolicy
-1.7  get_intermediate and target_intermediate
-2.   Frequency Table Helpers
+   1.   What To Do?
+   1.1  Initialization
+   1.2  Per-CPU Initialization
+   1.3  verify
+   1.4  target/target_index or setpolicy?
+   1.5  target/target_index
+   1.6  setpolicy
+   1.7  get_intermediate and target_intermediate
+   2.   Frequency Table Helpers



@ -49,7 +41,7 @@ function check whether this kernel runs on the right CPU and the right
 chipset. If so, register a struct cpufreq_driver with the CPUfreq core
 using cpufreq_register_driver()

-What shall this struct cpufreq_driver contain? 
+What shall this struct cpufreq_driver contain?

 .name - The name of this driver.

@ -108,37 +100,42 @@ Whenever a new CPU is registered with the device model, or after the
 cpufreq driver registers itself, the per-policy initialization function
 cpufreq_driver.init is called if no cpufreq policy existed for the CPU.
 Note that the .init() and .exit() routines are called only once for the
-policy and not for each CPU managed by the policy. It takes a struct
-cpufreq_policy *policy as argument. What to do now?
+policy and not for each CPU managed by the policy. It takes a ``struct
+cpufreq_policy *policy`` as argument. What to do now?

 If necessary, activate the CPUfreq support on your CPU.

 Then, the driver must fill in the following values:

-policy->cpuinfo.min_freq _and_
-policy->cpuinfo.max_freq -	the minimum and maximum frequency 
-				(in kHz) which is supported by 
-				this CPU
-policy->cpuinfo.transition_latency   the time it takes on this CPU to
-				switch between two frequencies in
-				nanoseconds (if appropriate, else
-				specify CPUFREQ_ETERNAL)
-
-policy->cur			The current operating frequency of
-				this CPU (if appropriate)
-policy->min, 
-policy->max, 
-policy->policy and, if necessary,
-policy->governor		must contain the "default policy" for
-				this CPU. A few moments later,
-				cpufreq_driver.verify and either
-				cpufreq_driver.setpolicy or
-				cpufreq_driver.target/target_index is called
-				with these values.
-policy->cpus			Update this with the masks of the
-				(online + offline) CPUs that do DVFS
-				along with this CPU (i.e.  that share
-				clock/voltage rails with it).
+-----------------------------------+--------------------------------------+
+|policy->cpuinfo.min_freq _and_	    |					   |
+|policy->cpuinfo.max_freq	    | the minimum and maximum frequency	   |
+|				    | (in kHz) which is supported by	   |
+|				    | this CPU				   |
+-----------------------------------+--------------------------------------+
+|policy->cpuinfo.transition_latency | the time it takes on this CPU to	   |
+|				    | switch between two frequencies in	   |
+|				    | nanoseconds (if appropriate, else	   |
+|				    | specify CPUFREQ_ETERNAL)		   |
+-----------------------------------+--------------------------------------+
+|policy->cur			    | The current operating frequency of   |
+|				    | this CPU (if appropriate)		   |
+-----------------------------------+--------------------------------------+
+|policy->min,			    |					   |
+|policy->max,			    |					   |
+|policy->policy and, if necessary,  |					   |
+|policy->governor		    | must contain the "default policy" for|
+|				    | this CPU. A few moments later,       |
+|				    | cpufreq_driver.verify and either     |
+|				    | cpufreq_driver.setpolicy or          |
+|				    | cpufreq_driver.target/target_index is|
+|				    | called with these values.		   |
+-----------------------------------+--------------------------------------+
+|policy->cpus			    | Update this with the masks of the	   |
+|				    | (online + offline) CPUs that do DVFS |
+|				    | along with this CPU (i.e.  that share|
+|				    | clock/voltage rails with it).	   |
+-----------------------------------+--------------------------------------+

 For setting some of these values (cpuinfo.min[max]_freq, policy->min[max]), the
 frequency table helpers might be helpful. See the section 2 for more information
@ -151,8 +148,8 @@ on them.
 When the user decides a new policy (consisting of
 "policy,governor,min,max") shall be set, this policy must be validated
 so that incompatible values can be corrected. For verifying these
-values cpufreq_verify_within_limits(struct cpufreq_policy *policy,
-unsigned int min_freq, unsigned int max_freq) function might be helpful.
+values cpufreq_verify_within_limits(``struct cpufreq_policy *policy``,
+``unsigned int min_freq``, ``unsigned int max_freq``) function might be helpful.
 See section 2 for details on frequency table helpers.

 You need to make sure that at least one valid frequency (or operating
@ -163,7 +160,7 @@ policy->max first, and only if this is no solution, decrease policy->min.
 1.4 target or target_index or setpolicy or fast_switch?
 -------------------------------------------------------

-Most cpufreq drivers or even most cpu frequency scaling algorithms 
+Most cpufreq drivers or even most cpu frequency scaling algorithms
 only allow the CPU frequency to be set to predefined fixed values. For
 these, you use the ->target(), ->target_index() or ->fast_switch()
 callbacks.
@ -175,8 +172,8 @@ limits on their own. These shall use the ->setpolicy() callback.
 1.5. target/target_index
 ------------------------

-The target_index call has two arguments: struct cpufreq_policy *policy,
-and unsigned int index (into the exposed frequency table).
+The target_index call has two arguments: ``struct cpufreq_policy *policy``,
+and ``unsigned int`` index (into the exposed frequency table).

 The CPUfreq driver must set the new frequency when called here. The
 actual frequency must be determined by freq_table[index].frequency.
@ -184,9 +181,9 @@ actual frequency must be determined by freq_table[index].frequency.
 It should always restore to earlier frequency (i.e. policy->restore_freq) in
 case of errors, even if we switched to intermediate frequency earlier.

-Deprecated:
+Deprecated
 ----------
-The target call has three arguments: struct cpufreq_policy *policy,
+The target call has three arguments: ``struct cpufreq_policy *policy``,
 unsigned int target_frequency, unsigned int relation.

 The CPUfreq driver must set the new frequency when called here. The
@ -210,14 +207,14 @@ Not all drivers are expected to implement it, as sleeping from within
 this callback isn't allowed. This callback must be highly optimized to
 do switching as fast as possible.

-This function has two arguments: struct cpufreq_policy *policy and
-unsigned int target_frequency.
+This function has two arguments: ``struct cpufreq_policy *policy`` and
+``unsigned int target_frequency``.


 1.7 setpolicy
 -------------

-The setpolicy call only takes a struct cpufreq_policy *policy as
+The setpolicy call only takes a ``struct cpufreq_policy *policy`` as
 argument. You need to set the lower limit of the in-processor or
 in-chipset dynamic frequency switching to policy->min, the upper limit
 to policy->max, and -if supported- select a performance-oriented
@ -278,10 +275,10 @@ table.

 cpufreq_for_each_valid_entry(pos, table) - iterates over all entries,
 excluding CPUFREQ_ENTRY_INVALID frequencies.
-Use arguments "pos" - a cpufreq_frequency_table * as a loop cursor and
-"table" - the cpufreq_frequency_table * you want to iterate over.
+Use arguments "pos" - a ``cpufreq_frequency_table *`` as a loop cursor and
+"table" - the ``cpufreq_frequency_table *`` you want to iterate over.

-For example:
+For example::

 	struct cpufreq_frequency_table *pos, *driver_freq_table;

--- a/Documentation/cpu-freq/cpufreq-nforce2.txt
+++ b/Documentation/cpu-freq/cpufreq-nforce2.txt
@ -1,19 +0,0 @@
-
-The cpufreq-nforce2 driver changes the FSB on nVidia nForce2 platforms.
-
-This works better than on other platforms, because the FSB of the CPU
-can be controlled independently from the PCI/AGP clock.
-
-The module has two options:
-
-	fid: 	 multiplier * 10 (for example 8.5 = 85)
-	min_fsb: minimum FSB
-
-If not set, fid is calculated from the current CPU speed and the FSB.
-min_fsb defaults to FSB at boot time - 50 MHz.
-
-IMPORTANT: The available range is limited downwards!
-           Also the minimum available FSB can differ, for systems 
-           booting with 200 MHz, 150 should always work.
-
-
--- a/Documentation/cpu-freq/cpufreq-stats.rst
+++ b/Documentation/cpu-freq/cpufreq-stats.rst
@ -1,21 +1,23 @@
+.. SPDX-License-Identifier: GPL-2.0

-     CPU frequency and voltage scaling statistics in the Linux(TM) kernel
+==========================================
+General Description of sysfs CPUFreq Stats
+==========================================
+
+information for users


-             L i n u x    c p u f r e q - s t a t s   d r i v e r
+Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>

-                       - information for users -
+.. Contents

-
-             Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
-
-Contents
-1. Introduction
-2. Statistics Provided (with example)
-3. Configuring cpufreq-stats
+   1. Introduction
+   2. Statistics Provided (with example)
+   3. Configuring cpufreq-stats


 1. Introduction
+===============

 cpufreq-stats is a driver that provides CPU frequency statistics for each CPU.
 These statistics are provided in /sysfs as a bunch of read_only interfaces. This
@ -28,8 +30,10 @@ that may be running on your CPU. So, it will work with any cpufreq_driver.


 2. Statistics Provided (with example)
+=====================================

 cpufreq stats provides following statistics (explained in detail below).
+
 -  time_in_state
 -  total_trans
 -  trans_table
@ -39,53 +43,57 @@ All the statistics will be from the time the stats driver has been inserted
 statistic is done. Obviously, stats driver will not have any information
 about the frequency transitions before the stats driver insertion.

--------------------------------------------------------------------------------
-<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # ls -l
-total 0
-drwxr-xr-x  2 root root    0 May 14 16:06 .
-drwxr-xr-x  3 root root    0 May 14 15:58 ..
--w-------  1 root root 4096 May 14 16:06 reset
-r--r--r--  1 root root 4096 May 14 16:06 time_in_state
-r--r--r--  1 root root 4096 May 14 16:06 total_trans
-r--r--r--  1 root root 4096 May 14 16:06 trans_table
--------------------------------------------------------------------------------
+::
+
+    <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # ls -l
+    total 0
+    drwxr-xr-x  2 root root    0 May 14 16:06 .
+    drwxr-xr-x  3 root root    0 May 14 15:58 ..
+    --w-------  1 root root 4096 May 14 16:06 reset
+    -r--r--r--  1 root root 4096 May 14 16:06 time_in_state
+    -r--r--r--  1 root root 4096 May 14 16:06 total_trans
+    -r--r--r--  1 root root 4096 May 14 16:06 trans_table
+
+- **reset**

-  reset
 Write-only attribute that can be used to reset the stat counters. This can be
 useful for evaluating system behaviour under different governors without the
 need for a reboot.

-  time_in_state
+- **time_in_state**
+
 This gives the amount of time spent in each of the frequencies supported by
 this CPU. The cat output will have "<frequency> <time>" pair in each line, which
 will mean this CPU spent <time> usertime units of time at <frequency>. Output
-will have one line for each of the supported frequencies. usertime units here 
+will have one line for each of the supported frequencies. usertime units here
 is 10mS (similar to other time exported in /proc).

--------------------------------------------------------------------------------
-<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat time_in_state 
-3600000 2089
-3400000 136
-3200000 34
-3000000 67
-2800000 172488
--------------------------------------------------------------------------------
+::
+
+    <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat time_in_state
+    3600000 2089
+    3400000 136
+    3200000 34
+    3000000 67
+    2800000 172488


-  total_trans
-This gives the total number of frequency transitions on this CPU. The cat 
+- **total_trans**
+
+This gives the total number of frequency transitions on this CPU. The cat
 output will have a single count which is the total number of frequency
 transitions.

--------------------------------------------------------------------------------
-<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat total_trans
-20
--------------------------------------------------------------------------------
+::
+
+    <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat total_trans
+    20
+
+- **trans_table**

-  trans_table
 This will give a fine grained information about all the CPU frequency
 transitions. The cat output here is a two dimensional matrix, where an entry
-<i,j> (row i, column j) represents the count of number of transitions from 
+<i,j> (row i, column j) represents the count of number of transitions from
 Freq_i to Freq_j. Freq_i rows and Freq_j columns follow the sorting order in
 which the driver has provided the frequency table initially to the cpufreq core
 and so can be sorted (ascending or descending) or unsorted.  The output here
@ -95,26 +103,27 @@ readability.
 If the transition table is bigger than PAGE_SIZE, reading this will
 return an -EFBIG error.

--------------------------------------------------------------------------------
-<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat trans_table
-   From  :    To
-         :   3600000   3400000   3200000   3000000   2800000 
-  3600000:         0         5         0         0         0 
-  3400000:         4         0         2         0         0 
-  3200000:         0         1         0         2         0 
-  3000000:         0         0         1         0         3 
-  2800000:         0         0         0         2         0 
--------------------------------------------------------------------------------
+::

+    <mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat trans_table
+    From  :    To
+	    :   3600000   3400000   3200000   3000000   2800000
+    3600000:         0         5         0         0         0
+    3400000:         4         0         2         0         0
+    3200000:         0         1         0         2         0
+    3000000:         0         0         1         0         3
+    2800000:         0         0         0         2         0

 3. Configuring cpufreq-stats
+============================

-To configure cpufreq-stats in your kernel
-Config Main Menu
-	Power management options (ACPI, APM)  --->
-		CPU Frequency scaling  --->
-			[*] CPU Frequency scaling
-			[*]   CPU frequency translation statistics
+To configure cpufreq-stats in your kernel::
+
+	Config Main Menu
+		Power management options (ACPI, APM)  --->
+			CPU Frequency scaling  --->
+				[*] CPU Frequency scaling
+				[*]   CPU frequency translation statistics


 "CPU Frequency scaling" (CONFIG_CPU_FREQ) should be enabled to configure
--- a/Documentation/cpu-freq/index.rst
+++ b/Documentation/cpu-freq/index.rst
@ -0,0 +1,39 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================================================
+Linux CPUFreq - CPU frequency and voltage scaling code in the Linux(TM) kernel
+==============================================================================
+
+Author: Dominik Brodowski  <linux@brodo.de>
+
+   Clock scaling allows you to change the clock speed of the CPUs on the
+   fly. This is a nice method to save battery power, because the lower
+   the clock speed, the less power the CPU consumes.
+
+
+.. toctree::
+   :maxdepth: 1
+
+   core
+   cpu-drivers
+   cpufreq-stats
+
+Mailing List
+------------
+There is a CPU frequency changing CVS commit and general list where
+you can report bugs, problems or submit patches. To post a message,
+send an email to linux-pm@vger.kernel.org.
+
+Links
+-----
+the FTP archives:
+* ftp://ftp.linux.org.uk/pub/linux/cpufreq/
+
+how to access the CVS repository:
+* http://cvs.arm.linux.org.uk/
+
+the CPUFreq Mailing list:
+* http://vger.kernel.org/vger-lists.html#linux-pm
+
+Clock and voltage scaling for the SA-1100:
+* http://www.lartmaker.nl/projects/scaling
--- a/Documentation/cpu-freq/index.txt
+++ b/Documentation/cpu-freq/index.txt
@ -1,56 +0,0 @@
-     CPU frequency and voltage scaling code in the Linux(TM) kernel
-
-
-		         L i n u x    C P U F r e q
-
-
-
-
-		    Dominik Brodowski  <linux@brodo.de>
-
-
-
-   Clock scaling allows you to change the clock speed of the CPUs on the
-    fly. This is a nice method to save battery power, because the lower
-            the clock speed, the less power the CPU consumes.
-
-
-
-Documents in this directory:
----------------------------
-
-amd-powernow.txt -	AMD powernow driver specific file.
-
-core.txt	-	General description of the CPUFreq core and
-			of CPUFreq notifiers.
-
-cpu-drivers.txt -	How to implement a new cpufreq processor driver.
-
-cpufreq-nforce2.txt -	nVidia nForce2 platform specific file.
-
-cpufreq-stats.txt -	General description of sysfs cpufreq stats.
-
-index.txt	-	File index, Mailing list and Links (this document)
-
-pcc-cpufreq.txt -	PCC cpufreq driver specific file.
-
-
-Mailing List
------------
-There is a CPU frequency changing CVS commit and general list where
-you can report bugs, problems or submit patches. To post a message,
-send an email to linux-pm@vger.kernel.org.
-
-Links
-----
-the FTP archives:
-* ftp://ftp.linux.org.uk/pub/linux/cpufreq/
-
-how to access the CVS repository:
-* http://cvs.arm.linux.org.uk/
-
-the CPUFreq Mailing list:
-* http://vger.kernel.org/vger-lists.html#linux-pm
-
-Clock and voltage scaling for the SA-1100:
-* http://www.lartmaker.nl/projects/scaling
--- a/Documentation/cpu-freq/pcc-cpufreq.txt
+++ b/Documentation/cpu-freq/pcc-cpufreq.txt
@ -1,207 +0,0 @@
-/*
- *  pcc-cpufreq.txt - PCC interface documentation
- *
- *  Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
- *  Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
- *      Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
- *  INFRINGEMENT. See the GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-
-			Processor Clocking Control Driver
-			---------------------------------
-
-Contents:
---------
-1.	Introduction
-1.1	PCC interface
-1.1.1   Get Average Frequency
-1.1.2	Set Desired Frequency
-1.2	Platforms affected
-2.	Driver and /sys details
-2.1	scaling_available_frequencies
-2.2	cpuinfo_transition_latency
-2.3	cpuinfo_cur_freq
-2.4	related_cpus
-3.	Caveats
-
-1. Introduction:
----------------
-Processor Clocking Control (PCC) is an interface between the platform
-firmware and OSPM. It is a mechanism for coordinating processor
-performance (ie: frequency) between the platform firmware and the OS.
-
-The PCC driver (pcc-cpufreq) allows OSPM to take advantage of the PCC
-interface.
-
-OS utilizes the PCC interface to inform platform firmware what frequency the
-OS wants for a logical processor. The platform firmware attempts to achieve
-the requested frequency. If the request for the target frequency could not be
-satisfied by platform firmware, then it usually means that power budget
-conditions are in place, and "power capping" is taking place.
-
-1.1 PCC interface:
------------------
-The complete PCC specification is available here:
-http://www.acpica.org/download/Processor-Clocking-Control-v1p0.pdf
-
-PCC relies on a shared memory region that provides a channel for communication
-between the OS and platform firmware. PCC also implements a "doorbell" that
-is used by the OS to inform the platform firmware that a command has been
-sent.
-
-The ACPI PCCH() method is used to discover the location of the PCC shared
-memory region. The shared memory region header contains the "command" and
-"status" interface. PCCH() also contains details on how to access the platform
-doorbell.
-
-The following commands are supported by the PCC interface:
-* Get Average Frequency
-* Set Desired Frequency
-
-The ACPI PCCP() method is implemented for each logical processor and is
-used to discover the offsets for the input and output buffers in the shared
-memory region.
-
-When PCC mode is enabled, the platform will not expose processor performance
-or throttle states (_PSS, _TSS and related ACPI objects) to OSPM. Therefore,
-the native P-state driver (such as acpi-cpufreq for Intel, powernow-k8 for
-AMD) will not load.
-
-However, OSPM remains in control of policy. The governor (eg: "ondemand")
-computes the required performance for each processor based on server workload.
-The PCC driver fills in the command interface, and the input buffer and
-communicates the request to the platform firmware. The platform firmware is
-responsible for delivering the requested performance.
-
-Each PCC command is "global" in scope and can affect all the logical CPUs in
-the system. Therefore, PCC is capable of performing "group" updates. With PCC
-the OS is capable of getting/setting the frequency of all the logical CPUs in
-the system with a single call to the BIOS.
-
-1.1.1 Get Average Frequency:
----------------------------
-This command is used by the OSPM to query the running frequency of the
-processor since the last time this command was completed. The output buffer
-indicates the average unhalted frequency of the logical processor expressed as
-a percentage of the nominal (ie: maximum) CPU frequency. The output buffer
-also signifies if the CPU frequency is limited by a power budget condition.
-
-1.1.2 Set Desired Frequency:
----------------------------
-This command is used by the OSPM to communicate to the platform firmware the
-desired frequency for a logical processor. The output buffer is currently
-ignored by OSPM. The next invocation of "Get Average Frequency" will inform
-OSPM if the desired frequency was achieved or not.
-
-1.2 Platforms affected:
-----------------------
-The PCC driver will load on any system where the platform firmware:
-* supports the PCC interface, and the associated PCCH() and PCCP() methods
-* assumes responsibility for managing the hardware clocking controls in order
-to deliver the requested processor performance
-
-Currently, certain HP ProLiant platforms implement the PCC interface. On those
-platforms PCC is the "default" choice.
-
-However, it is possible to disable this interface via a BIOS setting. In
-such an instance, as is also the case on platforms where the PCC interface
-is not implemented, the PCC driver will fail to load silently.
-
-2. Driver and /sys details:
---------------------------
-When the driver loads, it merely prints the lowest and the highest CPU
-frequencies supported by the platform firmware.
-
-The PCC driver loads with a message such as:
-pcc-cpufreq: (v1.00.00) driver loaded with frequency limits: 1600 MHz, 2933
-MHz
-
-This means that the OPSM can request the CPU to run at any frequency in
-between the limits (1600 MHz, and 2933 MHz) specified in the message.
-
-Internally, there is no need for the driver to convert the "target" frequency
-to a corresponding P-state.
-
-The VERSION number for the driver will be of the format v.xy.ab.
-eg: 1.00.02
-   ----- --
-    |    |
-    |    -- this will increase with bug fixes/enhancements to the driver
-    |-- this is the version of the PCC specification the driver adheres to
-
-
-The following is a brief discussion on some of the fields exported via the
-/sys filesystem and how their values are affected by the PCC driver:
-
-2.1 scaling_available_frequencies:
----------------------------------
-scaling_available_frequencies is not created in /sys. No intermediate
-frequencies need to be listed because the BIOS will try to achieve any
-frequency, within limits, requested by the governor. A frequency does not have
-to be strictly associated with a P-state.
-
-2.2 cpuinfo_transition_latency:
-------------------------------
-The cpuinfo_transition_latency field is 0. The PCC specification does
-not include a field to expose this value currently.
-
-2.3 cpuinfo_cur_freq:
---------------------
-A) Often cpuinfo_cur_freq will show a value different than what is declared
-in the scaling_available_frequencies or scaling_cur_freq, or scaling_max_freq.
-This is due to "turbo boost" available on recent Intel processors. If certain
-conditions are met the BIOS can achieve a slightly higher speed than requested
-by OSPM. An example:
-
-scaling_cur_freq	: 2933000
-cpuinfo_cur_freq	: 3196000
-
-B) There is a round-off error associated with the cpuinfo_cur_freq value.
-Since the driver obtains the current frequency as a "percentage" (%) of the
-nominal frequency from the BIOS, sometimes, the values displayed by
-scaling_cur_freq and cpuinfo_cur_freq may not match. An example:
-
-scaling_cur_freq	: 1600000
-cpuinfo_cur_freq	: 1583000
-
-In this example, the nominal frequency is 2933 MHz. The driver obtains the
-current frequency, cpuinfo_cur_freq, as 54% of the nominal frequency:
-
-	54% of 2933 MHz = 1583 MHz
-
-Nominal frequency is the maximum frequency of the processor, and it usually
-corresponds to the frequency of the P0 P-state.
-
-2.4 related_cpus:
-----------------
-The related_cpus field is identical to affected_cpus.
-
-affected_cpus	: 4
-related_cpus	: 4
-
-Currently, the PCC driver does not evaluate _PSD. The platforms that support
-PCC do not implement SW_ALL. So OSPM doesn't need to perform any coordination
-to ensure that the same frequency is requested of all dependent CPUs.
-
-3. Caveats:
-----------
-The "cpufreq_stats" module in its present form cannot be loaded and
-expected to work with the PCC driver. Since the "cpufreq_stats" module
-provides information wrt each P-state, it is not applicable to the PCC driver.
--- a/Documentation/debugging-modules.txt
+++ b/Documentation/debugging-modules.txt
@ -1,22 +0,0 @@
-Debugging Modules after 2.6.3
-----------------------------
-
-In almost all distributions, the kernel asks for modules which don't
-exist, such as "net-pf-10" or whatever.  Changing "modprobe -q" to
-"succeed" in this case is hacky and breaks some setups, and also we
-want to know if it failed for the fallback code for old aliases in
-fs/char_dev.c, for example.
-
-In the past a debugging message which would fill people's logs was
-emitted.  This debugging message has been removed.  The correct way
-of debugging module problems is something like this:
-
-echo '#! /bin/sh' > /tmp/modprobe
-echo 'echo "$@" >> /tmp/modprobe.log' >> /tmp/modprobe
-echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe
-chmod a+x /tmp/modprobe
-echo /tmp/modprobe > /proc/sys/kernel/modprobe
-
-Note that the above applies only when the *kernel* is requesting
-that the module be loaded -- it won't have any effect if that module
-is being loaded explicitly using "modprobe" from userspace.
--- a/Documentation/dev-tools/gcov.rst
+++ b/Documentation/dev-tools/gcov.rst
@ -203,7 +203,7 @@ Cause
    may not correctly copy files from sysfs.

 Solution
-    Use ``cat``' to read ``.gcda`` files and ``cp -d`` to copy links.
+    Use ``cat`` to read ``.gcda`` files and ``cp -d`` to copy links.
    Alternatively use the mechanism shown in Appendix B.


--- a/Documentation/dev-tools/kmemleak.rst
+++ b/Documentation/dev-tools/kmemleak.rst
@ -8,7 +8,8 @@ with the difference that the orphan objects are not freed but only
 reported via /sys/kernel/debug/kmemleak. A similar method is used by the
 Valgrind tool (``memcheck --leak-check``) to detect the memory leaks in
 user-space applications.
-Kmemleak is supported on x86, arm, powerpc, sparc, sh, microblaze, ppc, mips, s390 and tile.
+Kmemleak is supported on x86, arm, arm64, powerpc, sparc, sh, microblaze, mips,
+s390, nds32, arc and xtensa.

 Usage
 -----
--- a/Documentation/dev-tools/kunit/index.rst
+++ b/Documentation/dev-tools/kunit/index.rst
@ -17,14 +17,23 @@ What is KUnit?
 ==============

 KUnit is a lightweight unit testing and mocking framework for the Linux kernel.
-These tests are able to be run locally on a developer's workstation without a VM
-or special hardware.

 KUnit is heavily inspired by JUnit, Python's unittest.mock, and
 Googletest/Googlemock for C++. KUnit provides facilities for defining unit test
 cases, grouping related test cases into test suites, providing common
 infrastructure for running tests, and much more.

+KUnit consists of a kernel component, which provides a set of macros for easily
+writing unit tests. Tests written against KUnit will run on kernel boot if
+built-in, or when loaded if built as a module. These tests write out results to
+the kernel log in `TAP <https://testanything.org/>`_ format.
+
+To make running these tests (and reading the results) easier, KUnit offers
+:doc:`kunit_tool <kunit-tool>`, which builds a `User Mode Linux
+<http://user-mode-linux.sourceforge.net>`_ kernel, runs it, and parses the test
+results. This provides a quick way of running KUnit tests during development,
+without requiring a virtual machine or separate hardware.
+
 Get started now: :doc:`start`

 Why KUnit?
@ -36,21 +45,20 @@ allow all possible code paths to be tested in the code under test; this is only
 possible if the code under test is very small and does not have any external
 dependencies outside of the test's control like hardware.

-Outside of KUnit, there are no testing frameworks currently
-available for the kernel that do not require installing the kernel on a test
-machine or in a VM and all require tests to be written in userspace running on
-the kernel; this is true for Autotest, and kselftest, disqualifying
-any of them from being considered unit testing frameworks.
+KUnit provides a common framework for unit tests within the kernel.

-KUnit addresses the problem of being able to run tests without needing a virtual
-machine or actual hardware with User Mode Linux. User Mode Linux is a Linux
-architecture, like ARM or x86; however, unlike other architectures it compiles
-to a standalone program that can be run like any other program directly inside
-of a host operating system; to be clear, it does not require any virtualization
-support; it is just a regular program.
+KUnit tests can be run on most architectures, and most tests are architecture
+independent. All built-in KUnit tests run on kernel startup.  Alternatively,
+KUnit and KUnit tests can be built as modules and tests will run when the test
+module is loaded.

-Alternatively, kunit and kunit tests can be built as modules and tests will
-run when the test module is loaded.
+.. note::
+
+        KUnit can also run tests without needing a virtual machine or actual
+        hardware under User Mode Linux. User Mode Linux is a Linux architecture,
+        like ARM or x86, which compiles the kernel as a Linux executable. KUnit
+        can be used with UML either by building with ``ARCH=um`` (like any other
+        architecture), or by using :doc:`kunit_tool <kunit-tool>`.

 KUnit is fast. Excluding build time, from invocation to completion KUnit can run
 several dozen tests in only 10 to 20 seconds; this might not sound like a big
@ -81,3 +89,5 @@ How do I use it?
 *   :doc:`start` - for new users of KUnit
 *   :doc:`usage` - for a more detailed explanation of KUnit features
 *   :doc:`api/index` - for the list of KUnit APIs used for testing
+*   :doc:`kunit-tool` - for more information on the kunit_tool helper script
+*   :doc:`faq` - for answers to some common questions about KUnit
--- a/Documentation/dev-tools/kunit/kunit-tool.rst
+++ b/Documentation/dev-tools/kunit/kunit-tool.rst
@ -12,6 +12,13 @@ the Linux kernel as UML (`User Mode Linux
 <http://user-mode-linux.sourceforge.net/>`_), running KUnit tests, parsing
 the test results and displaying them in a user friendly manner.

+kunit_tool addresses the problem of being able to run tests without needing a
+virtual machine or actual hardware with User Mode Linux. User Mode Linux is a
+Linux architecture, like ARM or x86; however, unlike other architectures it
+compiles the kernel as a standalone Linux executable that can be run like any
+other program directly inside of a host operating system. To be clear, it does
+not require any virtualization support: it is just a regular program.
+
 What is a kunitconfig?
 ======================

--- a/Documentation/dev-tools/kunit/start.rst
+++ b/Documentation/dev-tools/kunit/start.rst
@ -9,11 +9,10 @@ Installing dependencies
 KUnit has the same dependencies as the Linux kernel. As long as you can build
 the kernel, you can run KUnit.

-KUnit Wrapper
-=============
-Included with KUnit is a simple Python wrapper that helps format the output to
-easily use and read KUnit output. It handles building and running the kernel, as
-well as formatting the output.
+Running tests with the KUnit Wrapper
+====================================
+Included with KUnit is a simple Python wrapper which runs tests under User Mode
+Linux, and formats the test results.

 The wrapper can be run with:

@ -21,22 +20,42 @@ The wrapper can be run with:

 	./tools/testing/kunit/kunit.py run --defconfig

-For more information on this wrapper (also called kunit_tool) checkout the
+For more information on this wrapper (also called kunit_tool) check out the
 :doc:`kunit-tool` page.

 Creating a .kunitconfig
-=======================
-The Python script is a thin wrapper around Kbuild. As such, it needs to be
-configured with a ``.kunitconfig`` file. This file essentially contains the
-regular Kernel config, with the specific test targets as well.
+-----------------------
+If you want to run a specific set of tests (rather than those listed in the
+KUnit defconfig), you can provide Kconfig options in the ``.kunitconfig`` file.
+This file essentially contains the regular Kernel config, with the specific
+test targets as well. The ``.kunitconfig`` should also contain any other config
+options required by the tests.

+A good starting point for a ``.kunitconfig`` is the KUnit defconfig:
 .. code-block:: bash

 	cd $PATH_TO_LINUX_REPO
 	cp arch/um/configs/kunit_defconfig .kunitconfig

-Verifying KUnit Works
---------------------
+You can then add any other Kconfig options you wish, e.g.:
+.. code-block:: none
+
+        CONFIG_LIST_KUNIT_TEST=y
+
+:doc:`kunit_tool <kunit-tool>` will ensure that all config options set in
+``.kunitconfig`` are set in the kernel ``.config`` before running the tests.
+It'll warn you if you haven't included the dependencies of the options you're
+using.
+
+.. note::
+   Note that removing something from the ``.kunitconfig`` will not trigger a
+   rebuild of the ``.config`` file: the configuration is only updated if the
+   ``.kunitconfig`` is not a subset of ``.config``. This means that you can use
+   other tools (such as make menuconfig) to adjust other config options.
+
+
+Running the tests
+-----------------

 To make sure that everything is set up correctly, simply invoke the Python
 wrapper from your kernel repo:
@ -62,6 +81,41 @@ followed by a list of tests that are run. All of them should be passing.
 	Because it is building a lot of sources for the first time, the
 	``Building KUnit kernel`` step may take a while.

+Running tests without the KUnit Wrapper
+=======================================
+
+If you'd rather not use the KUnit Wrapper (if, for example, you need to
+integrate with other systems, or use an architecture other than UML), KUnit can
+be included in any kernel, and the results read out and parsed manually.
+
+.. note::
+   KUnit is not designed for use in a production system, and it's possible that
+   tests may reduce the stability or security of the system.
+
+
+
+Configuring the kernel
+----------------------
+
+In order to enable KUnit itself, you simply need to enable the ``CONFIG_KUNIT``
+Kconfig option (it's under Kernel Hacking/Kernel Testing and Coverage in
+menuconfig). From there, you can enable any KUnit tests you want: they usually
+have config options ending in ``_KUNIT_TEST``.
+
+KUnit and KUnit tests can be compiled as modules: in this case the tests in a
+module will be run when the module is loaded.
+
+Running the tests
+-----------------
+
+Build and run your kernel as usual. Test output will be written to the kernel
+log in `TAP <https://testanything.org/>`_ format.
+
+.. note::
+   It's possible that there will be other lines and/or data interspersed in the
+   TAP output.
+
+
 Writing your first test
 =======================

--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@ -591,3 +591,17 @@ able to run one test case per invocation.

 .. TODO(brendanhiggins@google.com): Add an actual example of an architecture
   dependent KUnit test.
+
+KUnit debugfs representation
+============================
+When kunit test suites are initialized, they create an associated directory
+in /sys/kernel/debug/kunit/<test-suite>.  The directory contains one file
+
+- results: "cat results" displays results of each test case and the results
+  of the entire suite for the last test run.
+
+The debugfs representation is primarily of use when kunit test suites are
+run in a native environment, either as modules or builtin.  Having a way
+to display results like this is valuable as otherwise results can be
+intermixed with other events in dmesg output.  The maximum size of each
+results file is KUNIT_LOG_SIZE bytes (defined in include/kunit/test.h).
--- a/Documentation/devicetree/bindings/.gitignore
+++ b/Documentation/devicetree/bindings/.gitignore
@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 *.example.dts
-processed-schema.yaml
+processed-schema*.yaml
--- a/Documentation/devicetree/bindings/Makefile
+++ b/Documentation/devicetree/bindings/Makefile
@ -2,7 +2,6 @@
 DT_DOC_CHECKER ?= dt-doc-validate
 DT_EXTRACT_EX ?= dt-extract-example
 DT_MK_SCHEMA ?= dt-mk-schema
-DT_MK_SCHEMA_FLAGS := $(if $(DT_SCHEMA_FILES), -u)

 quiet_cmd_chk_binding = CHKDT   $(patsubst $(srctree)/%,%,$<)
      cmd_chk_binding = $(DT_DOC_CHECKER) -u $(srctree)/$(src) $< ; \
@ -11,26 +10,35 @@ quiet_cmd_chk_binding = CHKDT   $(patsubst $(srctree)/%,%,$<)
 $(obj)/%.example.dts: $(src)/%.yaml FORCE
 	$(call if_changed,chk_binding)

-DT_TMP_SCHEMA := processed-schema.yaml
+# Use full schemas when checking %.example.dts
+DT_TMP_SCHEMA := $(obj)/processed-schema-examples.yaml

 quiet_cmd_mk_schema = SCHEMA  $@
      cmd_mk_schema = $(DT_MK_SCHEMA) $(DT_MK_SCHEMA_FLAGS) -o $@ $(real-prereqs)

-DT_DOCS = $(shell \
+DT_DOCS = $(addprefix $(src)/, \
+	$(shell \
 	cd $(srctree)/$(src) && \
 	find * \( -name '*.yaml' ! \
-		-name $(DT_TMP_SCHEMA) ! \
+		-name 'processed-schema*' ! \
 		-name '*.example.dt.yaml' \) \
-	)
+	))

-DT_SCHEMA_FILES ?= $(addprefix $(src)/,$(DT_DOCS))
+DT_SCHEMA_FILES ?= $(DT_DOCS)

-ifeq ($(CHECK_DTBS),)
-extra-y += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES))
-extra-y += $(patsubst $(src)/%.yaml,%.example.dt.yaml, $(DT_SCHEMA_FILES))
-endif
+extra-$(CHECK_DT_BINDING) += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES))
+extra-$(CHECK_DT_BINDING) += $(patsubst $(src)/%.yaml,%.example.dt.yaml, $(DT_SCHEMA_FILES))
+extra-$(CHECK_DT_BINDING) += processed-schema-examples.yaml

-$(obj)/$(DT_TMP_SCHEMA): $(DT_SCHEMA_FILES) FORCE
+override DTC_FLAGS := \
+	-Wno-avoid_unnecessary_addr_size \
+	-Wno-graph_child_address
+
+$(obj)/processed-schema-examples.yaml: $(DT_DOCS) FORCE
 	$(call if_changed,mk_schema)

-extra-y += $(DT_TMP_SCHEMA)
+$(obj)/processed-schema.yaml: DT_MK_SCHEMA_FLAGS := -u
+$(obj)/processed-schema.yaml: $(DT_SCHEMA_FILES) FORCE
+	$(call if_changed,mk_schema)
+
+extra-y += processed-schema.yaml
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.yaml
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.yaml
@ -21,6 +21,8 @@ properties:
 required:
  - compatible

+additionalProperties: false
+
 examples:
  - |
    clkmgr@ffd04000 {
--- a/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml
+++ b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml
@ -43,6 +43,8 @@ required:
  - compatible
  - reg

+additionalProperties: false
+
 examples:
  - |
    ao-secure@140 {
--- a/Documentation/devicetree/bindings/arm/arm,integrator.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,integrator.yaml
@ -0,0 +1,86 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/arm,integrator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Integrator Boards Device Tree Bindings
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description: |+
+  These were the first ARM platforms officially supported by ARM Ltd.
+  They are ARMv4, ARMv5 and ARMv6-capable using different core tiles,
+  so the system is modular and can host a variety of CPU tiles called
+  "core tiles" and referred to in the device tree as "core modules".
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+      - description: ARM Integrator Application Platform, this board has a PCI
+          host and several PCI slots, as well as a number of slots for logical
+          expansion modules, it is referred to as an "ASIC Development
+          Motherboard" and is extended with custom FPGA and is intended for
+          rapid prototyping. See ARM DUI 0098B. This board can physically come
+          pre-packaged in a PC Tower form factor called Integrator/PP1 or a
+          special metal fixture called Integrator/PP2, see ARM DUI 0169A.
+        items:
+          - const: arm,integrator-ap
+      - description: ARM Integrator Compact Platform (HBI-0086), this board has
+          a compact form factor and mainly consists of the bare minimum
+          peripherals to make use of the core module. See ARM DUI 0159B.
+        items:
+          - const: arm,integrator-cp
+      - description: ARM Integrator Standard Development Board (SDB) Platform,
+          this board is a PCI-based board conforming to the Microsoft SDB
+          (HARP) specification. See ARM DUI 0099A.
+        items:
+          - const: arm,integrator-sp
+
+  core-module@10000000:
+    type: object
+    description: the root node in the Integrator platforms must contain
+      a core module child node. They are always at physical address
+      0x10000000 in all the Integrator variants.
+    properties:
+      compatible:
+        items:
+          - const: arm,core-module-integrator
+          - const: syscon
+          - const: simple-mfd
+      reg:
+        maxItems: 1
+
+    required:
+      - compatible
+      - reg
+
+patternProperties:
+  "^syscon@[0-9a-f]+$":
+    description: All Integrator boards must provide a system controller as a
+      node in the root of the device tree.
+    type: object
+    properties:
+      compatible:
+        items:
+          - enum:
+            - arm,integrator-ap-syscon
+            - arm,integrator-cp-syscon
+            - arm,integrator-sp-syscon
+          - const: syscon
+      reg:
+        maxItems: 1
+
+    required:
+      - compatible
+      - reg
+
+
+required:
+  - compatible
+  - core-module@10000000
+
+...
--- a/Show More
+++ b/Show More