Merge branch 'master' into mm-nonmm-stable
This commit is contained in:
commit
ee56c3e8ee
4
.mailmap
4
.mailmap
@ -10,6 +10,8 @@
|
||||
# Please keep this list dictionary sorted.
|
||||
#
|
||||
Aaron Durbin <adurbin@google.com>
|
||||
Abel Vesa <abelvesa@kernel.org> <abel.vesa@nxp.com>
|
||||
Abel Vesa <abelvesa@kernel.org> <abelvesa@gmail.com>
|
||||
Abhinav Kumar <quic_abhinavk@quicinc.com> <abhinavk@codeaurora.org>
|
||||
Adam Oldham <oldhamca@gmail.com>
|
||||
Adam Radford <aradford@gmail.com>
|
||||
@ -85,6 +87,7 @@ Christian Borntraeger <borntraeger@linux.ibm.com> <borntrae@de.ibm.com>
|
||||
Christian Brauner <brauner@kernel.org> <christian@brauner.io>
|
||||
Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com>
|
||||
Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
|
||||
Christian Marangi <ansuelsmth@gmail.com>
|
||||
Christophe Ricard <christophe.ricard@gmail.com>
|
||||
Christoph Hellwig <hch@lst.de>
|
||||
Colin Ian King <colin.king@intel.com> <colin.king@canonical.com>
|
||||
@ -165,6 +168,7 @@ Jan Glauber <jan.glauber@gmail.com> <jang@de.ibm.com>
|
||||
Jan Glauber <jan.glauber@gmail.com> <jang@linux.vnet.ibm.com>
|
||||
Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
|
||||
|
@ -1,4 +1,4 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/conversion_mode
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_conversion_mode
|
||||
KernelVersion: 4.2
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
|
@ -526,6 +526,7 @@ What: /sys/devices/system/cpu/vulnerabilities
|
||||
/sys/devices/system/cpu/vulnerabilities/srbds
|
||||
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
|
||||
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
|
||||
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
|
||||
Date: January 2018
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description: Information about CPU vulnerabilities
|
||||
|
@ -17,3 +17,4 @@ are configurable at compile, boot or run time.
|
||||
special-register-buffer-data-sampling.rst
|
||||
core-scheduling.rst
|
||||
l1d_flush.rst
|
||||
processor_mmio_stale_data.rst
|
||||
|
246
Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
Normal file
246
Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
Normal file
@ -0,0 +1,246 @@
|
||||
=========================================
|
||||
Processor MMIO Stale Data Vulnerabilities
|
||||
=========================================
|
||||
|
||||
Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O
|
||||
(MMIO) vulnerabilities that can expose data. The sequences of operations for
|
||||
exposing data range from simple to very complex. Because most of the
|
||||
vulnerabilities require the attacker to have access to MMIO, many environments
|
||||
are not affected. System environments using virtualization where MMIO access is
|
||||
provided to untrusted guests may need mitigation. These vulnerabilities are
|
||||
not transient execution attacks. However, these vulnerabilities may propagate
|
||||
stale data into core fill buffers where the data can subsequently be inferred
|
||||
by an unmitigated transient execution attack. Mitigation for these
|
||||
vulnerabilities includes a combination of microcode update and software
|
||||
changes, depending on the platform and usage model. Some of these mitigations
|
||||
are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or
|
||||
those used to mitigate Special Register Buffer Data Sampling (SRBDS).
|
||||
|
||||
Data Propagators
|
||||
================
|
||||
Propagators are operations that result in stale data being copied or moved from
|
||||
one microarchitectural buffer or register to another. Processor MMIO Stale Data
|
||||
Vulnerabilities are operations that may result in stale data being directly
|
||||
read into an architectural, software-visible state or sampled from a buffer or
|
||||
register.
|
||||
|
||||
Fill Buffer Stale Data Propagator (FBSDP)
|
||||
-----------------------------------------
|
||||
Stale data may propagate from fill buffers (FB) into the non-coherent portion
|
||||
of the uncore on some non-coherent writes. Fill buffer propagation by itself
|
||||
does not make stale data architecturally visible. Stale data must be propagated
|
||||
to a location where it is subject to reading or sampling.
|
||||
|
||||
Sideband Stale Data Propagator (SSDP)
|
||||
-------------------------------------
|
||||
The sideband stale data propagator (SSDP) is limited to the client (including
|
||||
Intel Xeon server E3) uncore implementation. The sideband response buffer is
|
||||
shared by all client cores. For non-coherent reads that go to sideband
|
||||
destinations, the uncore logic returns 64 bytes of data to the core, including
|
||||
both requested data and unrequested stale data, from a transaction buffer and
|
||||
the sideband response buffer. As a result, stale data from the sideband
|
||||
response and transaction buffers may now reside in a core fill buffer.
|
||||
|
||||
Primary Stale Data Propagator (PSDP)
|
||||
------------------------------------
|
||||
The primary stale data propagator (PSDP) is limited to the client (including
|
||||
Intel Xeon server E3) uncore implementation. Similar to the sideband response
|
||||
buffer, the primary response buffer is shared by all client cores. For some
|
||||
processors, MMIO primary reads will return 64 bytes of data to the core fill
|
||||
buffer including both requested data and unrequested stale data. This is
|
||||
similar to the sideband stale data propagator.
|
||||
|
||||
Vulnerabilities
|
||||
===============
|
||||
Device Register Partial Write (DRPW) (CVE-2022-21166)
|
||||
-----------------------------------------------------
|
||||
Some endpoint MMIO registers incorrectly handle writes that are smaller than
|
||||
the register size. Instead of aborting the write or only copying the correct
|
||||
subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than
|
||||
specified by the write transaction may be written to the register. On
|
||||
processors affected by FBSDP, this may expose stale data from the fill buffers
|
||||
of the core that created the write transaction.
|
||||
|
||||
Shared Buffers Data Sampling (SBDS) (CVE-2022-21125)
|
||||
----------------------------------------------------
|
||||
After propagators may have moved data around the uncore and copied stale data
|
||||
into client core fill buffers, processors affected by MFBDS can leak data from
|
||||
the fill buffer. It is limited to the client (including Intel Xeon server E3)
|
||||
uncore implementation.
|
||||
|
||||
Shared Buffers Data Read (SBDR) (CVE-2022-21123)
|
||||
------------------------------------------------
|
||||
It is similar to Shared Buffer Data Sampling (SBDS) except that the data is
|
||||
directly read into the architectural software-visible state. It is limited to
|
||||
the client (including Intel Xeon server E3) uncore implementation.
|
||||
|
||||
Affected Processors
|
||||
===================
|
||||
Not all the CPUs are affected by all the variants. For instance, most
|
||||
processors for the server market (excluding Intel Xeon E3 processors) are
|
||||
impacted by only Device Register Partial Write (DRPW).
|
||||
|
||||
Below is the list of affected Intel processors [#f1]_:
|
||||
|
||||
=================== ============ =========
|
||||
Common name Family_Model Steppings
|
||||
=================== ============ =========
|
||||
HASWELL_X 06_3FH 2,4
|
||||
SKYLAKE_L 06_4EH 3
|
||||
BROADWELL_X 06_4FH All
|
||||
SKYLAKE_X 06_55H 3,4,6,7,11
|
||||
BROADWELL_D 06_56H 3,4,5
|
||||
SKYLAKE 06_5EH 3
|
||||
ICELAKE_X 06_6AH 4,5,6
|
||||
ICELAKE_D 06_6CH 1
|
||||
ICELAKE_L 06_7EH 5
|
||||
ATOM_TREMONT_D 06_86H All
|
||||
LAKEFIELD 06_8AH 1
|
||||
KABYLAKE_L 06_8EH 9 to 12
|
||||
ATOM_TREMONT 06_96H 1
|
||||
ATOM_TREMONT_L 06_9CH 0
|
||||
KABYLAKE 06_9EH 9 to 13
|
||||
COMETLAKE 06_A5H 2,3,5
|
||||
COMETLAKE_L 06_A6H 0,1
|
||||
ROCKETLAKE 06_A7H 1
|
||||
=================== ============ =========
|
||||
|
||||
If a CPU is in the affected processor list, but not affected by a variant, it
|
||||
is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later
|
||||
section, mitigation largely remains the same for all the variants, i.e. to
|
||||
clear the CPU fill buffers via VERW instruction.
|
||||
|
||||
New bits in MSRs
|
||||
================
|
||||
Newer processors and microcode update on existing affected processors added new
|
||||
bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate
|
||||
specific variants of Processor MMIO Stale Data vulnerabilities and mitigation
|
||||
capability.
|
||||
|
||||
MSR IA32_ARCH_CAPABILITIES
|
||||
--------------------------
|
||||
Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the
|
||||
Shared Buffers Data Read (SBDR) vulnerability or the sideband stale
|
||||
data propagator (SSDP).
|
||||
Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer
|
||||
Stale Data Propagator (FBSDP).
|
||||
Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data
|
||||
Propagator (PSDP).
|
||||
Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer
|
||||
values as part of MD_CLEAR operations. Processors that do not
|
||||
enumerate MDS_NO (meaning they are affected by MDS) but that do
|
||||
enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate
|
||||
FB_CLEAR as part of their MD_CLEAR support.
|
||||
Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR
|
||||
IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS
|
||||
bit can be set to cause the VERW instruction to not perform the
|
||||
FB_CLEAR action. Not all processors that support FB_CLEAR will support
|
||||
FB_CLEAR_CTRL.
|
||||
|
||||
MSR IA32_MCU_OPT_CTRL
|
||||
---------------------
|
||||
Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR
|
||||
action. This may be useful to reduce the performance impact of FB_CLEAR in
|
||||
cases where system software deems it warranted (for example, when performance
|
||||
is more critical, or the untrusted software has no MMIO access). Note that
|
||||
FB_CLEAR_DIS has no impact on enumeration (for example, it does not change
|
||||
FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors
|
||||
that enumerate FB_CLEAR.
|
||||
|
||||
Mitigation
|
||||
==========
|
||||
Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the
|
||||
same mitigation strategy to force the CPU to clear the affected buffers before
|
||||
an attacker can extract the secrets.
|
||||
|
||||
This is achieved by using the otherwise unused and obsolete VERW instruction in
|
||||
combination with a microcode update. The microcode clears the affected CPU
|
||||
buffers when the VERW instruction is executed.
|
||||
|
||||
Kernel reuses the MDS function to invoke the buffer clearing:
|
||||
|
||||
mds_clear_cpu_buffers()
|
||||
|
||||
On MDS affected CPUs, the kernel already invokes CPU buffer clear on
|
||||
kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
|
||||
additional mitigation is needed on such CPUs.
|
||||
|
||||
For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker
|
||||
with MMIO capability. Therefore, VERW is not required for kernel/userspace. For
|
||||
virtualization case, VERW is only needed at VMENTER for a guest with MMIO
|
||||
capability.
|
||||
|
||||
Mitigation points
|
||||
-----------------
|
||||
Return to user space
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation
|
||||
needed.
|
||||
|
||||
C-State transition
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
Control register writes by CPU during C-state transition can propagate data
|
||||
from fill buffer to uncore buffers. Execute VERW before C-state transition to
|
||||
clear CPU fill buffers.
|
||||
|
||||
Guest entry point
|
||||
^^^^^^^^^^^^^^^^^
|
||||
Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise
|
||||
execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by
|
||||
MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO
|
||||
Stale Data vulnerabilities, so there is no need to execute VERW for such guests.
|
||||
|
||||
Mitigation control on the kernel command line
|
||||
---------------------------------------------
|
||||
The kernel command line allows to control the Processor MMIO Stale Data
|
||||
mitigations at boot time with the option "mmio_stale_data=". The valid
|
||||
arguments for this option are:
|
||||
|
||||
========== =================================================================
|
||||
full If the CPU is vulnerable, enable mitigation; CPU buffer clearing
|
||||
on exit to userspace and when entering a VM. Idle transitions are
|
||||
protected as well. It does not automatically disable SMT.
|
||||
full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the
|
||||
complete mitigation.
|
||||
off Disables mitigation completely.
|
||||
========== =================================================================
|
||||
|
||||
If the CPU is affected and mmio_stale_data=off is not supplied on the kernel
|
||||
command line, then the kernel selects the appropriate mitigation.
|
||||
|
||||
Mitigation status information
|
||||
-----------------------------
|
||||
The Linux kernel provides a sysfs interface to enumerate the current
|
||||
vulnerability status of the system: whether the system is vulnerable, and
|
||||
which mitigations are active. The relevant sysfs file is:
|
||||
|
||||
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
|
||||
|
||||
The possible values in this file are:
|
||||
|
||||
.. list-table::
|
||||
|
||||
* - 'Not affected'
|
||||
- The processor is not vulnerable
|
||||
* - 'Vulnerable'
|
||||
- The processor is vulnerable, but no mitigation enabled
|
||||
* - 'Vulnerable: Clear CPU buffers attempted, no microcode'
|
||||
- The processor is vulnerable, but microcode is not updated. The
|
||||
mitigation is enabled on a best effort basis.
|
||||
* - 'Mitigation: Clear CPU buffers'
|
||||
- The processor is vulnerable and the CPU buffer clearing mitigation is
|
||||
enabled.
|
||||
|
||||
If the processor is vulnerable then the following information is appended to
|
||||
the above information:
|
||||
|
||||
======================== ===========================================
|
||||
'SMT vulnerable' SMT is enabled
|
||||
'SMT disabled' SMT is disabled
|
||||
'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
|
||||
======================== ===========================================
|
||||
|
||||
References
|
||||
----------
|
||||
.. [#f1] Affected Processors
|
||||
https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
|
@ -2469,7 +2469,6 @@
|
||||
|
||||
protected: nVHE-based mode with support for guests whose
|
||||
state is kept private from the host.
|
||||
Not valid if the kernel is running in EL2.
|
||||
|
||||
Defaults to VHE/nVHE based on hardware support. Setting
|
||||
mode to "protected" will disable kexec and hibernation
|
||||
@ -3176,6 +3175,7 @@
|
||||
srbds=off [X86,INTEL]
|
||||
no_entry_flush [PPC]
|
||||
no_uaccess_flush [PPC]
|
||||
mmio_stale_data=off [X86]
|
||||
|
||||
Exceptions:
|
||||
This does not have any effect on
|
||||
@ -3197,6 +3197,7 @@
|
||||
Equivalent to: l1tf=flush,nosmt [X86]
|
||||
mds=full,nosmt [X86]
|
||||
tsx_async_abort=full,nosmt [X86]
|
||||
mmio_stale_data=full,nosmt [X86]
|
||||
|
||||
mminit_loglevel=
|
||||
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
|
||||
@ -3206,6 +3207,40 @@
|
||||
log everything. Information is printed at KERN_DEBUG
|
||||
so loglevel=8 may also need to be specified.
|
||||
|
||||
mmio_stale_data=
|
||||
[X86,INTEL] Control mitigation for the Processor
|
||||
MMIO Stale Data vulnerabilities.
|
||||
|
||||
Processor MMIO Stale Data is a class of
|
||||
vulnerabilities that may expose data after an MMIO
|
||||
operation. Exposed data could originate or end in
|
||||
the same CPU buffers as affected by MDS and TAA.
|
||||
Therefore, similar to MDS and TAA, the mitigation
|
||||
is to clear the affected CPU buffers.
|
||||
|
||||
This parameter controls the mitigation. The
|
||||
options are:
|
||||
|
||||
full - Enable mitigation on vulnerable CPUs
|
||||
|
||||
full,nosmt - Enable mitigation and disable SMT on
|
||||
vulnerable CPUs.
|
||||
|
||||
off - Unconditionally disable mitigation
|
||||
|
||||
On MDS or TAA affected machines,
|
||||
mmio_stale_data=off can be prevented by an active
|
||||
MDS or TAA mitigation as these vulnerabilities are
|
||||
mitigated with the same mechanism so in order to
|
||||
disable this mitigation, you need to specify
|
||||
mds=off and tsx_async_abort=off too.
|
||||
|
||||
Not specifying this option is equivalent to
|
||||
mmio_stale_data=full.
|
||||
|
||||
For details see:
|
||||
Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
|
||||
|
||||
module.sig_enforce
|
||||
[KNL] When CONFIG_MODULE_SIG is set, this means that
|
||||
modules without (valid) signatures will fail to load.
|
||||
|
@ -40,7 +40,6 @@ properties:
|
||||
value to be used for converting remote channel measurements to
|
||||
temperature.
|
||||
$ref: /schemas/types.yaml#/definitions/int32
|
||||
items:
|
||||
minimum: -128
|
||||
maximum: 127
|
||||
|
||||
|
@ -30,6 +30,7 @@ properties:
|
||||
- socionext,uniphier-ld11-aidet
|
||||
- socionext,uniphier-ld20-aidet
|
||||
- socionext,uniphier-pxs3-aidet
|
||||
- socionext,uniphier-nx1-aidet
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
@ -47,6 +47,5 @@ examples:
|
||||
clocks = <&clkcfg CLK_SPI0>;
|
||||
interrupt-parent = <&plic>;
|
||||
interrupts = <54>;
|
||||
spi-max-frequency = <25000000>;
|
||||
};
|
||||
...
|
||||
|
@ -110,7 +110,6 @@ examples:
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&qup_spi1_default>;
|
||||
interrupts = <GIC_SPI 602 IRQ_TYPE_LEVEL_HIGH>;
|
||||
spi-max-frequency = <50000000>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
};
|
||||
|
@ -136,7 +136,8 @@ properties:
|
||||
Phandle of a companion.
|
||||
|
||||
phys:
|
||||
maxItems: 1
|
||||
minItems: 1
|
||||
maxItems: 3
|
||||
|
||||
phy-names:
|
||||
const: usb
|
||||
|
@ -103,7 +103,8 @@ properties:
|
||||
Overrides the detected port count
|
||||
|
||||
phys:
|
||||
maxItems: 1
|
||||
minItems: 1
|
||||
maxItems: 3
|
||||
|
||||
phy-names:
|
||||
const: usb
|
||||
|
@ -6,7 +6,7 @@ This document explains how GPIOs can be assigned to given devices and functions.
|
||||
|
||||
Note that it only applies to the new descriptor-based interface. For a
|
||||
description of the deprecated integer-based GPIO interface please refer to
|
||||
gpio-legacy.txt (actually, there is no real mapping possible with the old
|
||||
legacy.rst (actually, there is no real mapping possible with the old
|
||||
interface; you just fetch an integer from somewhere and request the
|
||||
corresponding GPIO).
|
||||
|
||||
|
@ -4,7 +4,7 @@ GPIO Descriptor Consumer Interface
|
||||
|
||||
This document describes the consumer interface of the GPIO framework. Note that
|
||||
it describes the new descriptor-based interface. For a description of the
|
||||
deprecated integer-based GPIO interface please refer to gpio-legacy.txt.
|
||||
deprecated integer-based GPIO interface please refer to legacy.rst.
|
||||
|
||||
|
||||
Guidelines for GPIOs consumers
|
||||
@ -78,7 +78,7 @@ whether the line is configured active high or active low (see
|
||||
|
||||
The two last flags are used for use cases where open drain is mandatory, such
|
||||
as I2C: if the line is not already configured as open drain in the mappings
|
||||
(see board.txt), then open drain will be enforced anyway and a warning will be
|
||||
(see board.rst), then open drain will be enforced anyway and a warning will be
|
||||
printed that the board configuration needs to be updated to match the use case.
|
||||
|
||||
Both functions return either a valid GPIO descriptor, or an error code checkable
|
||||
@ -270,7 +270,7 @@ driven.
|
||||
The same is applicable for open drain or open source output lines: those do not
|
||||
actively drive their output high (open drain) or low (open source), they just
|
||||
switch their output to a high impedance value. The consumer should not need to
|
||||
care. (For details read about open drain in driver.txt.)
|
||||
care. (For details read about open drain in driver.rst.)
|
||||
|
||||
With this, all the gpiod_set_(array)_value_xxx() functions interpret the
|
||||
parameter "value" as "asserted" ("1") or "de-asserted" ("0"). The physical line
|
||||
|
@ -14,12 +14,12 @@ Due to the history of GPIO interfaces in the kernel, there are two different
|
||||
ways to obtain and use GPIOs:
|
||||
|
||||
- The descriptor-based interface is the preferred way to manipulate GPIOs,
|
||||
and is described by all the files in this directory excepted gpio-legacy.txt.
|
||||
and is described by all the files in this directory excepted legacy.rst.
|
||||
- The legacy integer-based interface which is considered deprecated (but still
|
||||
usable for compatibility reasons) is documented in gpio-legacy.txt.
|
||||
usable for compatibility reasons) is documented in legacy.rst.
|
||||
|
||||
The remainder of this document applies to the new descriptor-based interface.
|
||||
gpio-legacy.txt contains the same information applied to the legacy
|
||||
legacy.rst contains the same information applied to the legacy
|
||||
integer-based interface.
|
||||
|
||||
|
||||
|
@ -19,13 +19,23 @@ The main Btrfs features include:
|
||||
* Subvolumes (separate internal filesystem roots)
|
||||
* Object level mirroring and striping
|
||||
* Checksums on data and metadata (multiple algorithms available)
|
||||
* Compression
|
||||
* Compression (multiple algorithms available)
|
||||
* Reflink, deduplication
|
||||
* Scrub (on-line checksum verification)
|
||||
* Hierarchical quota groups (subvolume and snapshot support)
|
||||
* Integrated multiple device support, with several raid algorithms
|
||||
* Offline filesystem check
|
||||
* Efficient incremental backup and FS mirroring
|
||||
* Efficient incremental backup and FS mirroring (send/receive)
|
||||
* Trim/discard
|
||||
* Online filesystem defragmentation
|
||||
* Swapfile support
|
||||
* Zoned mode
|
||||
* Read/write metadata verification
|
||||
* Online resize (shrink, grow)
|
||||
|
||||
For more information please refer to the wiki
|
||||
For more information please refer to the documentation site or wiki
|
||||
|
||||
https://btrfs.readthedocs.io
|
||||
|
||||
https://btrfs.wiki.kernel.org
|
||||
|
||||
|
@ -13,8 +13,8 @@ disappeared as of Linux 3.0.
|
||||
|
||||
There are two places where extended attributes can be found. The first
|
||||
place is between the end of each inode entry and the beginning of the
|
||||
next inode entry. For example, if inode.i\_extra\_isize = 28 and
|
||||
sb.inode\_size = 256, then there are 256 - (128 + 28) = 100 bytes
|
||||
next inode entry. For example, if inode.i_extra_isize = 28 and
|
||||
sb.inode_size = 256, then there are 256 - (128 + 28) = 100 bytes
|
||||
available for in-inode extended attribute storage. The second place
|
||||
where extended attributes can be found is in the block pointed to by
|
||||
``inode.i_file_acl``. As of Linux 3.11, it is not possible for this
|
||||
@ -38,8 +38,8 @@ Extended attributes, when stored after the inode, have a header
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- h\_magic
|
||||
- __le32
|
||||
- h_magic
|
||||
- Magic number for identification, 0xEA020000. This value is set by the
|
||||
Linux driver, though e2fsprogs doesn't seem to check it(?)
|
||||
|
||||
@ -55,28 +55,28 @@ The beginning of an extended attribute block is in
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- h\_magic
|
||||
- __le32
|
||||
- h_magic
|
||||
- Magic number for identification, 0xEA020000.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- h\_refcount
|
||||
- __le32
|
||||
- h_refcount
|
||||
- Reference count.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- h\_blocks
|
||||
- __le32
|
||||
- h_blocks
|
||||
- Number of disk blocks used.
|
||||
* - 0xC
|
||||
- \_\_le32
|
||||
- h\_hash
|
||||
- __le32
|
||||
- h_hash
|
||||
- Hash value of all attributes.
|
||||
* - 0x10
|
||||
- \_\_le32
|
||||
- h\_checksum
|
||||
- __le32
|
||||
- h_checksum
|
||||
- Checksum of the extended attribute block.
|
||||
* - 0x14
|
||||
- \_\_u32
|
||||
- h\_reserved[3]
|
||||
- __u32
|
||||
- h_reserved[3]
|
||||
- Zero.
|
||||
|
||||
The checksum is calculated against the FS UUID, the 64-bit block number
|
||||
@ -100,46 +100,46 @@ Attributes stored inside an inode do not need be stored in sorted order.
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_u8
|
||||
- e\_name\_len
|
||||
- __u8
|
||||
- e_name_len
|
||||
- Length of name.
|
||||
* - 0x1
|
||||
- \_\_u8
|
||||
- e\_name\_index
|
||||
- __u8
|
||||
- e_name_index
|
||||
- Attribute name index. There is a discussion of this below.
|
||||
* - 0x2
|
||||
- \_\_le16
|
||||
- e\_value\_offs
|
||||
- __le16
|
||||
- e_value_offs
|
||||
- Location of this attribute's value on the disk block where it is stored.
|
||||
Multiple attributes can share the same value. For an inode attribute
|
||||
this value is relative to the start of the first entry; for a block this
|
||||
value is relative to the start of the block (i.e. the header).
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- e\_value\_inum
|
||||
- __le32
|
||||
- e_value_inum
|
||||
- The inode where the value is stored. Zero indicates the value is in the
|
||||
same block as this entry. This field is only used if the
|
||||
INCOMPAT\_EA\_INODE feature is enabled.
|
||||
INCOMPAT_EA_INODE feature is enabled.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- e\_value\_size
|
||||
- __le32
|
||||
- e_value_size
|
||||
- Length of attribute value.
|
||||
* - 0xC
|
||||
- \_\_le32
|
||||
- e\_hash
|
||||
- __le32
|
||||
- e_hash
|
||||
- Hash value of attribute name and attribute value. The kernel doesn't
|
||||
update the hash for in-inode attributes, so for that case this value
|
||||
must be zero, because e2fsck validates any non-zero hash regardless of
|
||||
where the xattr lives.
|
||||
* - 0x10
|
||||
- char
|
||||
- e\_name[e\_name\_len]
|
||||
- e_name[e_name_len]
|
||||
- Attribute name. Does not include trailing NULL.
|
||||
|
||||
Attribute values can follow the end of the entry table. There appears to
|
||||
be a requirement that they be aligned to 4-byte boundaries. The values
|
||||
are stored starting at the end of the block and grow towards the
|
||||
xattr\_header/xattr\_entry table. When the two collide, the overflow is
|
||||
xattr_header/xattr_entry table. When the two collide, the overflow is
|
||||
put into a separate disk block. If the disk block fills up, the
|
||||
filesystem returns -ENOSPC.
|
||||
|
||||
@ -167,15 +167,15 @@ the key name. Here is a map of name index values to key prefixes:
|
||||
* - 1
|
||||
- “user.”
|
||||
* - 2
|
||||
- “system.posix\_acl\_access”
|
||||
- “system.posix_acl_access”
|
||||
* - 3
|
||||
- “system.posix\_acl\_default”
|
||||
- “system.posix_acl_default”
|
||||
* - 4
|
||||
- “trusted.”
|
||||
* - 6
|
||||
- “security.”
|
||||
* - 7
|
||||
- “system.” (inline\_data only?)
|
||||
- “system.” (inline_data only?)
|
||||
* - 8
|
||||
- “system.richacl” (SuSE kernels only?)
|
||||
|
||||
|
@ -23,7 +23,7 @@ means that a block group addresses 32 gigabytes instead of 128 megabytes,
|
||||
also shrinking the amount of file system overhead for metadata.
|
||||
|
||||
The administrator can set a block cluster size at mkfs time (which is
|
||||
stored in the s\_log\_cluster\_size field in the superblock); from then
|
||||
stored in the s_log_cluster_size field in the superblock); from then
|
||||
on, the block bitmaps track clusters, not individual blocks. This means
|
||||
that block groups can be several gigabytes in size (instead of just
|
||||
128MiB); however, the minimum allocation unit becomes a cluster, not a
|
||||
|
@ -9,15 +9,15 @@ group.
|
||||
The inode bitmap records which entries in the inode table are in use.
|
||||
|
||||
As with most bitmaps, one bit represents the usage status of one data
|
||||
block or inode table entry. This implies a block group size of 8 \*
|
||||
number\_of\_bytes\_in\_a\_logical\_block.
|
||||
block or inode table entry. This implies a block group size of 8 *
|
||||
number_of_bytes_in_a_logical_block.
|
||||
|
||||
NOTE: If ``BLOCK_UNINIT`` is set for a given block group, various parts
|
||||
of the kernel and e2fsprogs code pretends that the block bitmap contains
|
||||
zeros (i.e. all blocks in the group are free). However, it is not
|
||||
necessarily the case that no blocks are in use -- if ``meta_bg`` is set,
|
||||
the bitmaps and group descriptor live inside the group. Unfortunately,
|
||||
ext2fs\_test\_block\_bitmap2() will return '0' for those locations,
|
||||
ext2fs_test_block_bitmap2() will return '0' for those locations,
|
||||
which produces confusing debugfs output.
|
||||
|
||||
Inode Table
|
||||
|
@ -56,39 +56,39 @@ established that the super block and the group descriptor table, if
|
||||
present, will be at the beginning of the block group. The bitmaps and
|
||||
the inode table can be anywhere, and it is quite possible for the
|
||||
bitmaps to come after the inode table, or for both to be in different
|
||||
groups (flex\_bg). Leftover space is used for file data blocks, indirect
|
||||
groups (flex_bg). Leftover space is used for file data blocks, indirect
|
||||
block maps, extent tree blocks, and extended attributes.
|
||||
|
||||
Flexible Block Groups
|
||||
---------------------
|
||||
|
||||
Starting in ext4, there is a new feature called flexible block groups
|
||||
(flex\_bg). In a flex\_bg, several block groups are tied together as one
|
||||
(flex_bg). In a flex_bg, several block groups are tied together as one
|
||||
logical block group; the bitmap spaces and the inode table space in the
|
||||
first block group of the flex\_bg are expanded to include the bitmaps
|
||||
and inode tables of all other block groups in the flex\_bg. For example,
|
||||
if the flex\_bg size is 4, then group 0 will contain (in order) the
|
||||
first block group of the flex_bg are expanded to include the bitmaps
|
||||
and inode tables of all other block groups in the flex_bg. For example,
|
||||
if the flex_bg size is 4, then group 0 will contain (in order) the
|
||||
superblock, group descriptors, data block bitmaps for groups 0-3, inode
|
||||
bitmaps for groups 0-3, inode tables for groups 0-3, and the remaining
|
||||
space in group 0 is for file data. The effect of this is to group the
|
||||
block group metadata close together for faster loading, and to enable
|
||||
large files to be continuous on disk. Backup copies of the superblock
|
||||
and group descriptors are always at the beginning of block groups, even
|
||||
if flex\_bg is enabled. The number of block groups that make up a
|
||||
flex\_bg is given by 2 ^ ``sb.s_log_groups_per_flex``.
|
||||
if flex_bg is enabled. The number of block groups that make up a
|
||||
flex_bg is given by 2 ^ ``sb.s_log_groups_per_flex``.
|
||||
|
||||
Meta Block Groups
|
||||
-----------------
|
||||
|
||||
Without the option META\_BG, for safety concerns, all block group
|
||||
Without the option META_BG, for safety concerns, all block group
|
||||
descriptors copies are kept in the first block group. Given the default
|
||||
128MiB(2^27 bytes) block group size and 64-byte group descriptors, ext4
|
||||
can have at most 2^27/64 = 2^21 block groups. This limits the entire
|
||||
filesystem size to 2^21 * 2^27 = 2^48bytes or 256TiB.
|
||||
|
||||
The solution to this problem is to use the metablock group feature
|
||||
(META\_BG), which is already in ext3 for all 2.6 releases. With the
|
||||
META\_BG feature, ext4 filesystems are partitioned into many metablock
|
||||
(META_BG), which is already in ext3 for all 2.6 releases. With the
|
||||
META_BG feature, ext4 filesystems are partitioned into many metablock
|
||||
groups. Each metablock group is a cluster of block groups whose group
|
||||
descriptor structures can be stored in a single disk block. For ext4
|
||||
filesystems with 4 KB block size, a single metablock group partition
|
||||
@ -110,7 +110,7 @@ bytes, a meta-block group contains 32 block groups for filesystems with
|
||||
a 1KB block size, and 128 block groups for filesystems with a 4KB
|
||||
blocksize. Filesystems can either be created using this new block group
|
||||
descriptor layout, or existing filesystems can be resized on-line, and
|
||||
the field s\_first\_meta\_bg in the superblock will indicate the first
|
||||
the field s_first_meta_bg in the superblock will indicate the first
|
||||
block group using this new layout.
|
||||
|
||||
Please see an important note about ``BLOCK_UNINIT`` in the section about
|
||||
@ -121,15 +121,15 @@ Lazy Block Group Initialization
|
||||
|
||||
A new feature for ext4 are three block group descriptor flags that
|
||||
enable mkfs to skip initializing other parts of the block group
|
||||
metadata. Specifically, the INODE\_UNINIT and BLOCK\_UNINIT flags mean
|
||||
metadata. Specifically, the INODE_UNINIT and BLOCK_UNINIT flags mean
|
||||
that the inode and block bitmaps for that group can be calculated and
|
||||
therefore the on-disk bitmap blocks are not initialized. This is
|
||||
generally the case for an empty block group or a block group containing
|
||||
only fixed-location block group metadata. The INODE\_ZEROED flag means
|
||||
only fixed-location block group metadata. The INODE_ZEROED flag means
|
||||
that the inode table has been initialized; mkfs will unset this flag and
|
||||
rely on the kernel to initialize the inode tables in the background.
|
||||
|
||||
By not writing zeroes to the bitmaps and inode table, mkfs time is
|
||||
reduced considerably. Note the feature flag is RO\_COMPAT\_GDT\_CSUM,
|
||||
but the dumpe2fs output prints this as “uninit\_bg”. They are the same
|
||||
reduced considerably. Note the feature flag is RO_COMPAT_GDT_CSUM,
|
||||
but the dumpe2fs output prints this as “uninit_bg”. They are the same
|
||||
thing.
|
||||
|
@ -1,7 +1,7 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| i.i\_block Offset | Where It Points |
|
||||
| i.i_block Offset | Where It Points |
|
||||
+=====================+==============================================================================================================================================================================================================================+
|
||||
| 0 to 11 | Direct map to file blocks 0 to 11. |
|
||||
+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
@ -4,7 +4,7 @@ Checksums
|
||||
---------
|
||||
|
||||
Starting in early 2012, metadata checksums were added to all major ext4
|
||||
and jbd2 data structures. The associated feature flag is metadata\_csum.
|
||||
and jbd2 data structures. The associated feature flag is metadata_csum.
|
||||
The desired checksum algorithm is indicated in the superblock, though as
|
||||
of October 2012 the only supported algorithm is crc32c. Some data
|
||||
structures did not have space to fit a full 32-bit checksum, so only the
|
||||
@ -20,7 +20,7 @@ encounters directory blocks that lack sufficient empty space to add a
|
||||
checksum, it will request that you run ``e2fsck -D`` to have the
|
||||
directories rebuilt with checksums. This has the added benefit of
|
||||
removing slack space from the directory files and rebalancing the htree
|
||||
indexes. If you \_ignore\_ this step, your directories will not be
|
||||
indexes. If you _ignore_ this step, your directories will not be
|
||||
protected by a checksum!
|
||||
|
||||
The following table describes the data elements that go into each type
|
||||
@ -35,39 +35,39 @@ of checksum. The checksum function is whatever the superblock describes
|
||||
- Length
|
||||
- Ingredients
|
||||
* - Superblock
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- The entire superblock up to the checksum field. The UUID lives inside
|
||||
the superblock.
|
||||
* - MMP
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- UUID + the entire MMP block up to the checksum field.
|
||||
* - Extended Attributes
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- UUID + the entire extended attribute block. The checksum field is set to
|
||||
zero.
|
||||
* - Directory Entries
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- UUID + inode number + inode generation + the directory block up to the
|
||||
fake entry enclosing the checksum field.
|
||||
* - HTREE Nodes
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- UUID + inode number + inode generation + all valid extents + HTREE tail.
|
||||
The checksum field is set to zero.
|
||||
* - Extents
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- UUID + inode number + inode generation + the entire extent block up to
|
||||
the checksum field.
|
||||
* - Bitmaps
|
||||
- \_\_le32 or \_\_le16
|
||||
- __le32 or __le16
|
||||
- UUID + the entire bitmap. Checksums are stored in the group descriptor,
|
||||
and truncated if the group descriptor size is 32 bytes (i.e. ^64bit)
|
||||
* - Inodes
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- UUID + inode number + inode generation + the entire inode. The checksum
|
||||
field is set to zero. Each inode has its own checksum.
|
||||
* - Group Descriptors
|
||||
- \_\_le16
|
||||
- If metadata\_csum, then UUID + group number + the entire descriptor;
|
||||
else if gdt\_csum, then crc16(UUID + group number + the entire
|
||||
- __le16
|
||||
- If metadata_csum, then UUID + group number + the entire descriptor;
|
||||
else if gdt_csum, then crc16(UUID + group number + the entire
|
||||
descriptor). In all cases, only the lower 16 bits are stored.
|
||||
|
||||
|
@ -42,24 +42,24 @@ is at most 263 bytes long, though on disk you'll need to reference
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- inode
|
||||
- Number of the inode that this directory entry points to.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- rec\_len
|
||||
- __le16
|
||||
- rec_len
|
||||
- Length of this directory entry. Must be a multiple of 4.
|
||||
* - 0x6
|
||||
- \_\_le16
|
||||
- name\_len
|
||||
- __le16
|
||||
- name_len
|
||||
- Length of the file name.
|
||||
* - 0x8
|
||||
- char
|
||||
- name[EXT4\_NAME\_LEN]
|
||||
- name[EXT4_NAME_LEN]
|
||||
- File name.
|
||||
|
||||
Since file names cannot be longer than 255 bytes, the new directory
|
||||
entry format shortens the name\_len field and uses the space for a file
|
||||
entry format shortens the name_len field and uses the space for a file
|
||||
type flag, probably to avoid having to load every inode during directory
|
||||
tree traversal. This format is ``ext4_dir_entry_2``, which is at most
|
||||
263 bytes long, though on disk you'll need to reference
|
||||
@ -74,24 +74,24 @@ tree traversal. This format is ``ext4_dir_entry_2``, which is at most
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- inode
|
||||
- Number of the inode that this directory entry points to.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- rec\_len
|
||||
- __le16
|
||||
- rec_len
|
||||
- Length of this directory entry.
|
||||
* - 0x6
|
||||
- \_\_u8
|
||||
- name\_len
|
||||
- __u8
|
||||
- name_len
|
||||
- Length of the file name.
|
||||
* - 0x7
|
||||
- \_\_u8
|
||||
- file\_type
|
||||
- __u8
|
||||
- file_type
|
||||
- File type code, see ftype_ table below.
|
||||
* - 0x8
|
||||
- char
|
||||
- name[EXT4\_NAME\_LEN]
|
||||
- name[EXT4_NAME_LEN]
|
||||
- File name.
|
||||
|
||||
.. _ftype:
|
||||
@ -137,19 +137,19 @@ entry uses this extension, it may be up to 271 bytes.
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- hash
|
||||
- The hash of the directory name
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- minor\_hash
|
||||
- __le32
|
||||
- minor_hash
|
||||
- The minor hash of the directory name
|
||||
|
||||
|
||||
In order to add checksums to these classic directory blocks, a phony
|
||||
``struct ext4_dir_entry`` is placed at the end of each leaf block to
|
||||
hold the checksum. The directory entry is 12 bytes long. The inode
|
||||
number and name\_len fields are set to zero to fool old software into
|
||||
number and name_len fields are set to zero to fool old software into
|
||||
ignoring an apparently empty directory entry, and the checksum is stored
|
||||
in the place where the name normally goes. The structure is
|
||||
``struct ext4_dir_entry_tail``:
|
||||
@ -163,24 +163,24 @@ in the place where the name normally goes. The structure is
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- det\_reserved\_zero1
|
||||
- __le32
|
||||
- det_reserved_zero1
|
||||
- Inode number, which must be zero.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- det\_rec\_len
|
||||
- __le16
|
||||
- det_rec_len
|
||||
- Length of this directory entry, which must be 12.
|
||||
* - 0x6
|
||||
- \_\_u8
|
||||
- det\_reserved\_zero2
|
||||
- __u8
|
||||
- det_reserved_zero2
|
||||
- Length of the file name, which must be zero.
|
||||
* - 0x7
|
||||
- \_\_u8
|
||||
- det\_reserved\_ft
|
||||
- __u8
|
||||
- det_reserved_ft
|
||||
- File type, which must be 0xDE.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- det\_checksum
|
||||
- __le32
|
||||
- det_checksum
|
||||
- Directory leaf block checksum.
|
||||
|
||||
The leaf directory block checksum is calculated against the FS UUID, the
|
||||
@ -194,7 +194,7 @@ Hash Tree Directories
|
||||
A linear array of directory entries isn't great for performance, so a
|
||||
new feature was added to ext3 to provide a faster (but peculiar)
|
||||
balanced tree keyed off a hash of the directory entry name. If the
|
||||
EXT4\_INDEX\_FL (0x1000) flag is set in the inode, this directory uses a
|
||||
EXT4_INDEX_FL (0x1000) flag is set in the inode, this directory uses a
|
||||
hashed btree (htree) to organize and find directory entries. For
|
||||
backwards read-only compatibility with ext2, this tree is actually
|
||||
hidden inside the directory file, masquerading as “empty” directory data
|
||||
@ -206,14 +206,14 @@ rest of the directory block is empty so that it moves on.
|
||||
The root of the tree always lives in the first data block of the
|
||||
directory. By ext2 custom, the '.' and '..' entries must appear at the
|
||||
beginning of this first block, so they are put here as two
|
||||
``struct ext4_dir_entry_2``\ s and not stored in the tree. The rest of
|
||||
``struct ext4_dir_entry_2`` s and not stored in the tree. The rest of
|
||||
the root node contains metadata about the tree and finally a hash->block
|
||||
map to find nodes that are lower in the htree. If
|
||||
``dx_root.info.indirect_levels`` is non-zero then the htree has two
|
||||
levels; the data block pointed to by the root node's map is an interior
|
||||
node, which is indexed by a minor hash. Interior nodes in this tree
|
||||
contains a zeroed out ``struct ext4_dir_entry_2`` followed by a
|
||||
minor\_hash->block map to find leafe nodes. Leaf nodes contain a linear
|
||||
minor_hash->block map to find leafe nodes. Leaf nodes contain a linear
|
||||
array of all ``struct ext4_dir_entry_2``; all of these entries
|
||||
(presumably) hash to the same value. If there is an overflow, the
|
||||
entries simply overflow into the next leaf node, and the
|
||||
@ -245,83 +245,83 @@ of a data block:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- dot.inode
|
||||
- inode number of this directory.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- dot.rec\_len
|
||||
- __le16
|
||||
- dot.rec_len
|
||||
- Length of this record, 12.
|
||||
* - 0x6
|
||||
- u8
|
||||
- dot.name\_len
|
||||
- dot.name_len
|
||||
- Length of the name, 1.
|
||||
* - 0x7
|
||||
- u8
|
||||
- dot.file\_type
|
||||
- dot.file_type
|
||||
- File type of this entry, 0x2 (directory) (if the feature flag is set).
|
||||
* - 0x8
|
||||
- char
|
||||
- dot.name[4]
|
||||
- “.\\0\\0\\0”
|
||||
- “.\0\0\0”
|
||||
* - 0xC
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- dotdot.inode
|
||||
- inode number of parent directory.
|
||||
* - 0x10
|
||||
- \_\_le16
|
||||
- dotdot.rec\_len
|
||||
- block\_size - 12. The record length is long enough to cover all htree
|
||||
- __le16
|
||||
- dotdot.rec_len
|
||||
- block_size - 12. The record length is long enough to cover all htree
|
||||
data.
|
||||
* - 0x12
|
||||
- u8
|
||||
- dotdot.name\_len
|
||||
- dotdot.name_len
|
||||
- Length of the name, 2.
|
||||
* - 0x13
|
||||
- u8
|
||||
- dotdot.file\_type
|
||||
- dotdot.file_type
|
||||
- File type of this entry, 0x2 (directory) (if the feature flag is set).
|
||||
* - 0x14
|
||||
- char
|
||||
- dotdot\_name[4]
|
||||
- “..\\0\\0”
|
||||
- dotdot_name[4]
|
||||
- “..\0\0”
|
||||
* - 0x18
|
||||
- \_\_le32
|
||||
- struct dx\_root\_info.reserved\_zero
|
||||
- __le32
|
||||
- struct dx_root_info.reserved_zero
|
||||
- Zero.
|
||||
* - 0x1C
|
||||
- u8
|
||||
- struct dx\_root\_info.hash\_version
|
||||
- struct dx_root_info.hash_version
|
||||
- Hash type, see dirhash_ table below.
|
||||
* - 0x1D
|
||||
- u8
|
||||
- struct dx\_root\_info.info\_length
|
||||
- struct dx_root_info.info_length
|
||||
- Length of the tree information, 0x8.
|
||||
* - 0x1E
|
||||
- u8
|
||||
- struct dx\_root\_info.indirect\_levels
|
||||
- Depth of the htree. Cannot be larger than 3 if the INCOMPAT\_LARGEDIR
|
||||
- struct dx_root_info.indirect_levels
|
||||
- Depth of the htree. Cannot be larger than 3 if the INCOMPAT_LARGEDIR
|
||||
feature is set; cannot be larger than 2 otherwise.
|
||||
* - 0x1F
|
||||
- u8
|
||||
- struct dx\_root\_info.unused\_flags
|
||||
- struct dx_root_info.unused_flags
|
||||
-
|
||||
* - 0x20
|
||||
- \_\_le16
|
||||
- __le16
|
||||
- limit
|
||||
- Maximum number of dx\_entries that can follow this header, plus 1 for
|
||||
- Maximum number of dx_entries that can follow this header, plus 1 for
|
||||
the header itself.
|
||||
* - 0x22
|
||||
- \_\_le16
|
||||
- __le16
|
||||
- count
|
||||
- Actual number of dx\_entries that follow this header, plus 1 for the
|
||||
- Actual number of dx_entries that follow this header, plus 1 for the
|
||||
header itself.
|
||||
* - 0x24
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- block
|
||||
- The block number (within the directory file) that goes with hash=0.
|
||||
* - 0x28
|
||||
- struct dx\_entry
|
||||
- struct dx_entry
|
||||
- entries[0]
|
||||
- As many 8-byte ``struct dx_entry`` as fits in the rest of the data block.
|
||||
|
||||
@ -362,38 +362,38 @@ also the full length of a data block:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- fake.inode
|
||||
- Zero, to make it look like this entry is not in use.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- fake.rec\_len
|
||||
- The size of the block, in order to hide all of the dx\_node data.
|
||||
- __le16
|
||||
- fake.rec_len
|
||||
- The size of the block, in order to hide all of the dx_node data.
|
||||
* - 0x6
|
||||
- u8
|
||||
- name\_len
|
||||
- name_len
|
||||
- Zero. There is no name for this “unused” directory entry.
|
||||
* - 0x7
|
||||
- u8
|
||||
- file\_type
|
||||
- file_type
|
||||
- Zero. There is no file type for this “unused” directory entry.
|
||||
* - 0x8
|
||||
- \_\_le16
|
||||
- __le16
|
||||
- limit
|
||||
- Maximum number of dx\_entries that can follow this header, plus 1 for
|
||||
- Maximum number of dx_entries that can follow this header, plus 1 for
|
||||
the header itself.
|
||||
* - 0xA
|
||||
- \_\_le16
|
||||
- __le16
|
||||
- count
|
||||
- Actual number of dx\_entries that follow this header, plus 1 for the
|
||||
- Actual number of dx_entries that follow this header, plus 1 for the
|
||||
header itself.
|
||||
* - 0xE
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- block
|
||||
- The block number (within the directory file) that goes with the lowest
|
||||
hash value of this block. This value is stored in the parent block.
|
||||
* - 0x12
|
||||
- struct dx\_entry
|
||||
- struct dx_entry
|
||||
- entries[0]
|
||||
- As many 8-byte ``struct dx_entry`` as fits in the rest of the data block.
|
||||
|
||||
@ -410,11 +410,11 @@ long:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- hash
|
||||
- Hash code.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- __le32
|
||||
- block
|
||||
- Block number (within the directory file, not filesystem blocks) of the
|
||||
next node in the htree.
|
||||
@ -423,13 +423,13 @@ long:
|
||||
author.)
|
||||
|
||||
If metadata checksums are enabled, the last 8 bytes of the directory
|
||||
block (precisely the length of one dx\_entry) are used to store a
|
||||
block (precisely the length of one dx_entry) are used to store a
|
||||
``struct dx_tail``, which contains the checksum. The ``limit`` and
|
||||
``count`` entries in the dx\_root/dx\_node structures are adjusted as
|
||||
necessary to fit the dx\_tail into the block. If there is no space for
|
||||
the dx\_tail, the user is notified to run e2fsck -D to rebuild the
|
||||
``count`` entries in the dx_root/dx_node structures are adjusted as
|
||||
necessary to fit the dx_tail into the block. If there is no space for
|
||||
the dx_tail, the user is notified to run e2fsck -D to rebuild the
|
||||
directory index (which will ensure that there's space for the checksum.
|
||||
The dx\_tail structure is 8 bytes long and looks like this:
|
||||
The dx_tail structure is 8 bytes long and looks like this:
|
||||
|
||||
.. list-table::
|
||||
:widths: 8 8 24 40
|
||||
@ -441,13 +441,13 @@ The dx\_tail structure is 8 bytes long and looks like this:
|
||||
- Description
|
||||
* - 0x0
|
||||
- u32
|
||||
- dt\_reserved
|
||||
- dt_reserved
|
||||
- Zero.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- dt\_checksum
|
||||
- __le32
|
||||
- dt_checksum
|
||||
- Checksum of the htree directory block.
|
||||
|
||||
The checksum is calculated against the FS UUID, the htree index header
|
||||
(dx\_root or dx\_node), all of the htree indices (dx\_entry) that are in
|
||||
use, and the tail block (dx\_tail).
|
||||
(dx_root or dx_node), all of the htree indices (dx_entry) that are in
|
||||
use, and the tail block (dx_tail).
|
||||
|
@ -5,14 +5,14 @@ Large Extended Attribute Values
|
||||
|
||||
To enable ext4 to store extended attribute values that do not fit in the
|
||||
inode or in the single extended attribute block attached to an inode,
|
||||
the EA\_INODE feature allows us to store the value in the data blocks of
|
||||
the EA_INODE feature allows us to store the value in the data blocks of
|
||||
a regular file inode. This “EA inode” is linked only from the extended
|
||||
attribute name index and must not appear in a directory entry. The
|
||||
inode's i\_atime field is used to store a checksum of the xattr value;
|
||||
and i\_ctime/i\_version store a 64-bit reference count, which enables
|
||||
inode's i_atime field is used to store a checksum of the xattr value;
|
||||
and i_ctime/i_version store a 64-bit reference count, which enables
|
||||
sharing of large xattr values between multiple owning inodes. For
|
||||
backward compatibility with older versions of this feature, the
|
||||
i\_mtime/i\_generation *may* store a back-reference to the inode number
|
||||
and i\_generation of the **one** owning inode (in cases where the EA
|
||||
i_mtime/i_generation *may* store a back-reference to the inode number
|
||||
and i_generation of the **one** owning inode (in cases where the EA
|
||||
inode is not referenced by multiple inodes) to verify that the EA inode
|
||||
is the correct one being accessed.
|
||||
|
@ -7,34 +7,34 @@ Each block group on the filesystem has one of these descriptors
|
||||
associated with it. As noted in the Layout section above, the group
|
||||
descriptors (if present) are the second item in the block group. The
|
||||
standard configuration is for each block group to contain a full copy of
|
||||
the block group descriptor table unless the sparse\_super feature flag
|
||||
the block group descriptor table unless the sparse_super feature flag
|
||||
is set.
|
||||
|
||||
Notice how the group descriptor records the location of both bitmaps and
|
||||
the inode table (i.e. they can float). This means that within a block
|
||||
group, the only data structures with fixed locations are the superblock
|
||||
and the group descriptor table. The flex\_bg mechanism uses this
|
||||
and the group descriptor table. The flex_bg mechanism uses this
|
||||
property to group several block groups into a flex group and lay out all
|
||||
of the groups' bitmaps and inode tables into one long run in the first
|
||||
group of the flex group.
|
||||
|
||||
If the meta\_bg feature flag is set, then several block groups are
|
||||
grouped together into a meta group. Note that in the meta\_bg case,
|
||||
If the meta_bg feature flag is set, then several block groups are
|
||||
grouped together into a meta group. Note that in the meta_bg case,
|
||||
however, the first and last two block groups within the larger meta
|
||||
group contain only group descriptors for the groups inside the meta
|
||||
group.
|
||||
|
||||
flex\_bg and meta\_bg do not appear to be mutually exclusive features.
|
||||
flex_bg and meta_bg do not appear to be mutually exclusive features.
|
||||
|
||||
In ext2, ext3, and ext4 (when the 64bit feature is not enabled), the
|
||||
block group descriptor was only 32 bytes long and therefore ends at
|
||||
bg\_checksum. On an ext4 filesystem with the 64bit feature enabled, the
|
||||
bg_checksum. On an ext4 filesystem with the 64bit feature enabled, the
|
||||
block group descriptor expands to at least the 64 bytes described below;
|
||||
the size is stored in the superblock.
|
||||
|
||||
If gdt\_csum is set and metadata\_csum is not set, the block group
|
||||
If gdt_csum is set and metadata_csum is not set, the block group
|
||||
checksum is the crc16 of the FS UUID, the group number, and the group
|
||||
descriptor structure. If metadata\_csum is set, then the block group
|
||||
descriptor structure. If metadata_csum is set, then the block group
|
||||
checksum is the lower 16 bits of the checksum of the FS UUID, the group
|
||||
number, and the group descriptor structure. Both block and inode bitmap
|
||||
checksums are calculated against the FS UUID, the group number, and the
|
||||
@ -51,59 +51,59 @@ The block group descriptor is laid out in ``struct ext4_group_desc``.
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- bg\_block\_bitmap\_lo
|
||||
- __le32
|
||||
- bg_block_bitmap_lo
|
||||
- Lower 32-bits of location of block bitmap.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- bg\_inode\_bitmap\_lo
|
||||
- __le32
|
||||
- bg_inode_bitmap_lo
|
||||
- Lower 32-bits of location of inode bitmap.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- bg\_inode\_table\_lo
|
||||
- __le32
|
||||
- bg_inode_table_lo
|
||||
- Lower 32-bits of location of inode table.
|
||||
* - 0xC
|
||||
- \_\_le16
|
||||
- bg\_free\_blocks\_count\_lo
|
||||
- __le16
|
||||
- bg_free_blocks_count_lo
|
||||
- Lower 16-bits of free block count.
|
||||
* - 0xE
|
||||
- \_\_le16
|
||||
- bg\_free\_inodes\_count\_lo
|
||||
- __le16
|
||||
- bg_free_inodes_count_lo
|
||||
- Lower 16-bits of free inode count.
|
||||
* - 0x10
|
||||
- \_\_le16
|
||||
- bg\_used\_dirs\_count\_lo
|
||||
- __le16
|
||||
- bg_used_dirs_count_lo
|
||||
- Lower 16-bits of directory count.
|
||||
* - 0x12
|
||||
- \_\_le16
|
||||
- bg\_flags
|
||||
- __le16
|
||||
- bg_flags
|
||||
- Block group flags. See the bgflags_ table below.
|
||||
* - 0x14
|
||||
- \_\_le32
|
||||
- bg\_exclude\_bitmap\_lo
|
||||
- __le32
|
||||
- bg_exclude_bitmap_lo
|
||||
- Lower 32-bits of location of snapshot exclusion bitmap.
|
||||
* - 0x18
|
||||
- \_\_le16
|
||||
- bg\_block\_bitmap\_csum\_lo
|
||||
- __le16
|
||||
- bg_block_bitmap_csum_lo
|
||||
- Lower 16-bits of the block bitmap checksum.
|
||||
* - 0x1A
|
||||
- \_\_le16
|
||||
- bg\_inode\_bitmap\_csum\_lo
|
||||
- __le16
|
||||
- bg_inode_bitmap_csum_lo
|
||||
- Lower 16-bits of the inode bitmap checksum.
|
||||
* - 0x1C
|
||||
- \_\_le16
|
||||
- bg\_itable\_unused\_lo
|
||||
- __le16
|
||||
- bg_itable_unused_lo
|
||||
- Lower 16-bits of unused inode count. If set, we needn't scan past the
|
||||
``(sb.s_inodes_per_group - gdt.bg_itable_unused)``\ th entry in the
|
||||
``(sb.s_inodes_per_group - gdt.bg_itable_unused)`` th entry in the
|
||||
inode table for this group.
|
||||
* - 0x1E
|
||||
- \_\_le16
|
||||
- bg\_checksum
|
||||
- Group descriptor checksum; crc16(sb\_uuid+group\_num+bg\_desc) if the
|
||||
RO\_COMPAT\_GDT\_CSUM feature is set, or
|
||||
crc32c(sb\_uuid+group\_num+bg\_desc) & 0xFFFF if the
|
||||
RO\_COMPAT\_METADATA\_CSUM feature is set. The bg\_checksum
|
||||
field in bg\_desc is skipped when calculating crc16 checksum,
|
||||
- __le16
|
||||
- bg_checksum
|
||||
- Group descriptor checksum; crc16(sb_uuid+group_num+bg_desc) if the
|
||||
RO_COMPAT_GDT_CSUM feature is set, or
|
||||
crc32c(sb_uuid+group_num+bg_desc) & 0xFFFF if the
|
||||
RO_COMPAT_METADATA_CSUM feature is set. The bg_checksum
|
||||
field in bg_desc is skipped when calculating crc16 checksum,
|
||||
and set to zero if crc32c checksum is used.
|
||||
* -
|
||||
-
|
||||
@ -111,48 +111,48 @@ The block group descriptor is laid out in ``struct ext4_group_desc``.
|
||||
- These fields only exist if the 64bit feature is enabled and s_desc_size
|
||||
> 32.
|
||||
* - 0x20
|
||||
- \_\_le32
|
||||
- bg\_block\_bitmap\_hi
|
||||
- __le32
|
||||
- bg_block_bitmap_hi
|
||||
- Upper 32-bits of location of block bitmap.
|
||||
* - 0x24
|
||||
- \_\_le32
|
||||
- bg\_inode\_bitmap\_hi
|
||||
- __le32
|
||||
- bg_inode_bitmap_hi
|
||||
- Upper 32-bits of location of inodes bitmap.
|
||||
* - 0x28
|
||||
- \_\_le32
|
||||
- bg\_inode\_table\_hi
|
||||
- __le32
|
||||
- bg_inode_table_hi
|
||||
- Upper 32-bits of location of inodes table.
|
||||
* - 0x2C
|
||||
- \_\_le16
|
||||
- bg\_free\_blocks\_count\_hi
|
||||
- __le16
|
||||
- bg_free_blocks_count_hi
|
||||
- Upper 16-bits of free block count.
|
||||
* - 0x2E
|
||||
- \_\_le16
|
||||
- bg\_free\_inodes\_count\_hi
|
||||
- __le16
|
||||
- bg_free_inodes_count_hi
|
||||
- Upper 16-bits of free inode count.
|
||||
* - 0x30
|
||||
- \_\_le16
|
||||
- bg\_used\_dirs\_count\_hi
|
||||
- __le16
|
||||
- bg_used_dirs_count_hi
|
||||
- Upper 16-bits of directory count.
|
||||
* - 0x32
|
||||
- \_\_le16
|
||||
- bg\_itable\_unused\_hi
|
||||
- __le16
|
||||
- bg_itable_unused_hi
|
||||
- Upper 16-bits of unused inode count.
|
||||
* - 0x34
|
||||
- \_\_le32
|
||||
- bg\_exclude\_bitmap\_hi
|
||||
- __le32
|
||||
- bg_exclude_bitmap_hi
|
||||
- Upper 32-bits of location of snapshot exclusion bitmap.
|
||||
* - 0x38
|
||||
- \_\_le16
|
||||
- bg\_block\_bitmap\_csum\_hi
|
||||
- __le16
|
||||
- bg_block_bitmap_csum_hi
|
||||
- Upper 16-bits of the block bitmap checksum.
|
||||
* - 0x3A
|
||||
- \_\_le16
|
||||
- bg\_inode\_bitmap\_csum\_hi
|
||||
- __le16
|
||||
- bg_inode_bitmap_csum_hi
|
||||
- Upper 16-bits of the inode bitmap checksum.
|
||||
* - 0x3C
|
||||
- \_\_u32
|
||||
- bg\_reserved
|
||||
- __u32
|
||||
- bg_reserved
|
||||
- Padding to 64 bytes.
|
||||
|
||||
.. _bgflags:
|
||||
@ -166,8 +166,8 @@ Block group flags can be any combination of the following:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x1
|
||||
- inode table and bitmap are not initialized (EXT4\_BG\_INODE\_UNINIT).
|
||||
- inode table and bitmap are not initialized (EXT4_BG_INODE_UNINIT).
|
||||
* - 0x2
|
||||
- block bitmap is not initialized (EXT4\_BG\_BLOCK\_UNINIT).
|
||||
- block bitmap is not initialized (EXT4_BG_BLOCK_UNINIT).
|
||||
* - 0x4
|
||||
- inode table is zeroed (EXT4\_BG\_INODE\_ZEROED).
|
||||
- inode table is zeroed (EXT4_BG_INODE_ZEROED).
|
||||
|
@ -1,6 +1,6 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
The Contents of inode.i\_block
|
||||
The Contents of inode.i_block
|
||||
------------------------------
|
||||
|
||||
Depending on the type of file an inode describes, the 60 bytes of
|
||||
@ -47,7 +47,7 @@ In ext4, the file to logical block map has been replaced with an extent
|
||||
tree. Under the old scheme, allocating a contiguous run of 1,000 blocks
|
||||
requires an indirect block to map all 1,000 entries; with extents, the
|
||||
mapping is reduced to a single ``struct ext4_extent`` with
|
||||
``ee_len = 1000``. If flex\_bg is enabled, it is possible to allocate
|
||||
``ee_len = 1000``. If flex_bg is enabled, it is possible to allocate
|
||||
very large files with a single extent, at a considerable reduction in
|
||||
metadata block use, and some improvement in disk efficiency. The inode
|
||||
must have the extents flag (0x80000) flag set for this feature to be in
|
||||
@ -76,28 +76,28 @@ which is 12 bytes long:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le16
|
||||
- eh\_magic
|
||||
- __le16
|
||||
- eh_magic
|
||||
- Magic number, 0xF30A.
|
||||
* - 0x2
|
||||
- \_\_le16
|
||||
- eh\_entries
|
||||
- __le16
|
||||
- eh_entries
|
||||
- Number of valid entries following the header.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- eh\_max
|
||||
- __le16
|
||||
- eh_max
|
||||
- Maximum number of entries that could follow the header.
|
||||
* - 0x6
|
||||
- \_\_le16
|
||||
- eh\_depth
|
||||
- __le16
|
||||
- eh_depth
|
||||
- Depth of this extent node in the extent tree. 0 = this extent node
|
||||
points to data blocks; otherwise, this extent node points to other
|
||||
extent nodes. The extent tree can be at most 5 levels deep: a logical
|
||||
block number can be at most ``2^32``, and the smallest ``n`` that
|
||||
satisfies ``4*(((blocksize - 12)/12)^n) >= 2^32`` is 5.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- eh\_generation
|
||||
- __le32
|
||||
- eh_generation
|
||||
- Generation of the tree. (Used by Lustre, but not standard ext4).
|
||||
|
||||
Internal nodes of the extent tree, also known as index nodes, are
|
||||
@ -112,22 +112,22 @@ recorded as ``struct ext4_extent_idx``, and are 12 bytes long:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- ei\_block
|
||||
- __le32
|
||||
- ei_block
|
||||
- This index node covers file blocks from 'block' onward.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- ei\_leaf\_lo
|
||||
- __le32
|
||||
- ei_leaf_lo
|
||||
- Lower 32-bits of the block number of the extent node that is the next
|
||||
level lower in the tree. The tree node pointed to can be either another
|
||||
internal node or a leaf node, described below.
|
||||
* - 0x8
|
||||
- \_\_le16
|
||||
- ei\_leaf\_hi
|
||||
- __le16
|
||||
- ei_leaf_hi
|
||||
- Upper 16-bits of the previous field.
|
||||
* - 0xA
|
||||
- \_\_u16
|
||||
- ei\_unused
|
||||
- __u16
|
||||
- ei_unused
|
||||
-
|
||||
|
||||
Leaf nodes of the extent tree are recorded as ``struct ext4_extent``,
|
||||
@ -142,24 +142,24 @@ and are also 12 bytes long:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- ee\_block
|
||||
- __le32
|
||||
- ee_block
|
||||
- First file block number that this extent covers.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- ee\_len
|
||||
- __le16
|
||||
- ee_len
|
||||
- Number of blocks covered by extent. If the value of this field is <=
|
||||
32768, the extent is initialized. If the value of the field is > 32768,
|
||||
the extent is uninitialized and the actual extent length is ``ee_len`` -
|
||||
32768. Therefore, the maximum length of a initialized extent is 32768
|
||||
blocks, and the maximum length of an uninitialized extent is 32767.
|
||||
* - 0x6
|
||||
- \_\_le16
|
||||
- ee\_start\_hi
|
||||
- __le16
|
||||
- ee_start_hi
|
||||
- Upper 16-bits of the block number to which this extent points.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- ee\_start\_lo
|
||||
- __le32
|
||||
- ee_start_lo
|
||||
- Lower 32-bits of the block number to which this extent points.
|
||||
|
||||
Prior to the introduction of metadata checksums, the extent header +
|
||||
@ -182,8 +182,8 @@ including) the checksum itself.
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- eb\_checksum
|
||||
- __le32
|
||||
- eb_checksum
|
||||
- Checksum of the extent block, crc32c(uuid+inum+igeneration+extentblock)
|
||||
|
||||
Inline Data
|
||||
|
@ -11,12 +11,12 @@ file is smaller than 60 bytes, then the data are stored inline in
|
||||
attribute space, then it might be found as an extended attribute
|
||||
“system.data” within the inode body (“ibody EA”). This of course
|
||||
constrains the amount of extended attributes one can attach to an inode.
|
||||
If the data size increases beyond i\_block + ibody EA, a regular block
|
||||
If the data size increases beyond i_block + ibody EA, a regular block
|
||||
is allocated and the contents moved to that block.
|
||||
|
||||
Pending a change to compact the extended attribute key used to store
|
||||
inline data, one ought to be able to store 160 bytes of data in a
|
||||
256-byte inode (as of June 2015, when i\_extra\_isize is 28). Prior to
|
||||
256-byte inode (as of June 2015, when i_extra_isize is 28). Prior to
|
||||
that, the limit was 156 bytes due to inefficient use of inode space.
|
||||
|
||||
The inline data feature requires the presence of an extended attribute
|
||||
@ -25,12 +25,12 @@ for “system.data”, even if the attribute value is zero length.
|
||||
Inline Directories
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The first four bytes of i\_block are the inode number of the parent
|
||||
The first four bytes of i_block are the inode number of the parent
|
||||
directory. Following that is a 56-byte space for an array of directory
|
||||
entries; see ``struct ext4_dir_entry``. If there is a “system.data”
|
||||
attribute in the inode body, the EA value is an array of
|
||||
``struct ext4_dir_entry`` as well. Note that for inline directories, the
|
||||
i\_block and EA space are treated as separate dirent blocks; directory
|
||||
i_block and EA space are treated as separate dirent blocks; directory
|
||||
entries cannot span the two.
|
||||
|
||||
Inline directory entries are not checksummed, as the inode checksum
|
||||
|
@ -38,138 +38,138 @@ The inode table entry is laid out in ``struct ext4_inode``.
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le16
|
||||
- i\_mode
|
||||
- __le16
|
||||
- i_mode
|
||||
- File mode. See the table i_mode_ below.
|
||||
* - 0x2
|
||||
- \_\_le16
|
||||
- i\_uid
|
||||
- __le16
|
||||
- i_uid
|
||||
- Lower 16-bits of Owner UID.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- i\_size\_lo
|
||||
- __le32
|
||||
- i_size_lo
|
||||
- Lower 32-bits of size in bytes.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- i\_atime
|
||||
- Last access time, in seconds since the epoch. However, if the EA\_INODE
|
||||
- __le32
|
||||
- i_atime
|
||||
- Last access time, in seconds since the epoch. However, if the EA_INODE
|
||||
inode flag is set, this inode stores an extended attribute value and
|
||||
this field contains the checksum of the value.
|
||||
* - 0xC
|
||||
- \_\_le32
|
||||
- i\_ctime
|
||||
- __le32
|
||||
- i_ctime
|
||||
- Last inode change time, in seconds since the epoch. However, if the
|
||||
EA\_INODE inode flag is set, this inode stores an extended attribute
|
||||
EA_INODE inode flag is set, this inode stores an extended attribute
|
||||
value and this field contains the lower 32 bits of the attribute value's
|
||||
reference count.
|
||||
* - 0x10
|
||||
- \_\_le32
|
||||
- i\_mtime
|
||||
- __le32
|
||||
- i_mtime
|
||||
- Last data modification time, in seconds since the epoch. However, if the
|
||||
EA\_INODE inode flag is set, this inode stores an extended attribute
|
||||
EA_INODE inode flag is set, this inode stores an extended attribute
|
||||
value and this field contains the number of the inode that owns the
|
||||
extended attribute.
|
||||
* - 0x14
|
||||
- \_\_le32
|
||||
- i\_dtime
|
||||
- __le32
|
||||
- i_dtime
|
||||
- Deletion Time, in seconds since the epoch.
|
||||
* - 0x18
|
||||
- \_\_le16
|
||||
- i\_gid
|
||||
- __le16
|
||||
- i_gid
|
||||
- Lower 16-bits of GID.
|
||||
* - 0x1A
|
||||
- \_\_le16
|
||||
- i\_links\_count
|
||||
- __le16
|
||||
- i_links_count
|
||||
- Hard link count. Normally, ext4 does not permit an inode to have more
|
||||
than 65,000 hard links. This applies to files as well as directories,
|
||||
which means that there cannot be more than 64,998 subdirectories in a
|
||||
directory (each subdirectory's '..' entry counts as a hard link, as does
|
||||
the '.' entry in the directory itself). With the DIR\_NLINK feature
|
||||
the '.' entry in the directory itself). With the DIR_NLINK feature
|
||||
enabled, ext4 supports more than 64,998 subdirectories by setting this
|
||||
field to 1 to indicate that the number of hard links is not known.
|
||||
* - 0x1C
|
||||
- \_\_le32
|
||||
- i\_blocks\_lo
|
||||
- Lower 32-bits of “block” count. If the huge\_file feature flag is not
|
||||
- __le32
|
||||
- i_blocks_lo
|
||||
- Lower 32-bits of “block” count. If the huge_file feature flag is not
|
||||
set on the filesystem, the file consumes ``i_blocks_lo`` 512-byte blocks
|
||||
on disk. If huge\_file is set and EXT4\_HUGE\_FILE\_FL is NOT set in
|
||||
on disk. If huge_file is set and EXT4_HUGE_FILE_FL is NOT set in
|
||||
``inode.i_flags``, then the file consumes ``i_blocks_lo + (i_blocks_hi
|
||||
<< 32)`` 512-byte blocks on disk. If huge\_file is set and
|
||||
EXT4\_HUGE\_FILE\_FL IS set in ``inode.i_flags``, then this file
|
||||
<< 32)`` 512-byte blocks on disk. If huge_file is set and
|
||||
EXT4_HUGE_FILE_FL IS set in ``inode.i_flags``, then this file
|
||||
consumes (``i_blocks_lo + i_blocks_hi`` << 32) filesystem blocks on
|
||||
disk.
|
||||
* - 0x20
|
||||
- \_\_le32
|
||||
- i\_flags
|
||||
- __le32
|
||||
- i_flags
|
||||
- Inode flags. See the table i_flags_ below.
|
||||
* - 0x24
|
||||
- 4 bytes
|
||||
- i\_osd1
|
||||
- i_osd1
|
||||
- See the table i_osd1_ for more details.
|
||||
* - 0x28
|
||||
- 60 bytes
|
||||
- i\_block[EXT4\_N\_BLOCKS=15]
|
||||
- Block map or extent tree. See the section “The Contents of inode.i\_block”.
|
||||
- i_block[EXT4_N_BLOCKS=15]
|
||||
- Block map or extent tree. See the section “The Contents of inode.i_block”.
|
||||
* - 0x64
|
||||
- \_\_le32
|
||||
- i\_generation
|
||||
- __le32
|
||||
- i_generation
|
||||
- File version (for NFS).
|
||||
* - 0x68
|
||||
- \_\_le32
|
||||
- i\_file\_acl\_lo
|
||||
- __le32
|
||||
- i_file_acl_lo
|
||||
- Lower 32-bits of extended attribute block. ACLs are of course one of
|
||||
many possible extended attributes; I think the name of this field is a
|
||||
result of the first use of extended attributes being for ACLs.
|
||||
* - 0x6C
|
||||
- \_\_le32
|
||||
- i\_size\_high / i\_dir\_acl
|
||||
- __le32
|
||||
- i_size_high / i_dir_acl
|
||||
- Upper 32-bits of file/directory size. In ext2/3 this field was named
|
||||
i\_dir\_acl, though it was usually set to zero and never used.
|
||||
i_dir_acl, though it was usually set to zero and never used.
|
||||
* - 0x70
|
||||
- \_\_le32
|
||||
- i\_obso\_faddr
|
||||
- __le32
|
||||
- i_obso_faddr
|
||||
- (Obsolete) fragment address.
|
||||
* - 0x74
|
||||
- 12 bytes
|
||||
- i\_osd2
|
||||
- i_osd2
|
||||
- See the table i_osd2_ for more details.
|
||||
* - 0x80
|
||||
- \_\_le16
|
||||
- i\_extra\_isize
|
||||
- __le16
|
||||
- i_extra_isize
|
||||
- Size of this inode - 128. Alternately, the size of the extended inode
|
||||
fields beyond the original ext2 inode, including this field.
|
||||
* - 0x82
|
||||
- \_\_le16
|
||||
- i\_checksum\_hi
|
||||
- __le16
|
||||
- i_checksum_hi
|
||||
- Upper 16-bits of the inode checksum.
|
||||
* - 0x84
|
||||
- \_\_le32
|
||||
- i\_ctime\_extra
|
||||
- __le32
|
||||
- i_ctime_extra
|
||||
- Extra change time bits. This provides sub-second precision. See Inode
|
||||
Timestamps section.
|
||||
* - 0x88
|
||||
- \_\_le32
|
||||
- i\_mtime\_extra
|
||||
- __le32
|
||||
- i_mtime_extra
|
||||
- Extra modification time bits. This provides sub-second precision.
|
||||
* - 0x8C
|
||||
- \_\_le32
|
||||
- i\_atime\_extra
|
||||
- __le32
|
||||
- i_atime_extra
|
||||
- Extra access time bits. This provides sub-second precision.
|
||||
* - 0x90
|
||||
- \_\_le32
|
||||
- i\_crtime
|
||||
- __le32
|
||||
- i_crtime
|
||||
- File creation time, in seconds since the epoch.
|
||||
* - 0x94
|
||||
- \_\_le32
|
||||
- i\_crtime\_extra
|
||||
- __le32
|
||||
- i_crtime_extra
|
||||
- Extra file creation time bits. This provides sub-second precision.
|
||||
* - 0x98
|
||||
- \_\_le32
|
||||
- i\_version\_hi
|
||||
- __le32
|
||||
- i_version_hi
|
||||
- Upper 32-bits for version number.
|
||||
* - 0x9C
|
||||
- \_\_le32
|
||||
- i\_projid
|
||||
- __le32
|
||||
- i_projid
|
||||
- Project ID.
|
||||
|
||||
.. _i_mode:
|
||||
@ -183,45 +183,45 @@ The ``i_mode`` value is a combination of the following flags:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x1
|
||||
- S\_IXOTH (Others may execute)
|
||||
- S_IXOTH (Others may execute)
|
||||
* - 0x2
|
||||
- S\_IWOTH (Others may write)
|
||||
- S_IWOTH (Others may write)
|
||||
* - 0x4
|
||||
- S\_IROTH (Others may read)
|
||||
- S_IROTH (Others may read)
|
||||
* - 0x8
|
||||
- S\_IXGRP (Group members may execute)
|
||||
- S_IXGRP (Group members may execute)
|
||||
* - 0x10
|
||||
- S\_IWGRP (Group members may write)
|
||||
- S_IWGRP (Group members may write)
|
||||
* - 0x20
|
||||
- S\_IRGRP (Group members may read)
|
||||
- S_IRGRP (Group members may read)
|
||||
* - 0x40
|
||||
- S\_IXUSR (Owner may execute)
|
||||
- S_IXUSR (Owner may execute)
|
||||
* - 0x80
|
||||
- S\_IWUSR (Owner may write)
|
||||
- S_IWUSR (Owner may write)
|
||||
* - 0x100
|
||||
- S\_IRUSR (Owner may read)
|
||||
- S_IRUSR (Owner may read)
|
||||
* - 0x200
|
||||
- S\_ISVTX (Sticky bit)
|
||||
- S_ISVTX (Sticky bit)
|
||||
* - 0x400
|
||||
- S\_ISGID (Set GID)
|
||||
- S_ISGID (Set GID)
|
||||
* - 0x800
|
||||
- S\_ISUID (Set UID)
|
||||
- S_ISUID (Set UID)
|
||||
* -
|
||||
- These are mutually-exclusive file types:
|
||||
* - 0x1000
|
||||
- S\_IFIFO (FIFO)
|
||||
- S_IFIFO (FIFO)
|
||||
* - 0x2000
|
||||
- S\_IFCHR (Character device)
|
||||
- S_IFCHR (Character device)
|
||||
* - 0x4000
|
||||
- S\_IFDIR (Directory)
|
||||
- S_IFDIR (Directory)
|
||||
* - 0x6000
|
||||
- S\_IFBLK (Block device)
|
||||
- S_IFBLK (Block device)
|
||||
* - 0x8000
|
||||
- S\_IFREG (Regular file)
|
||||
- S_IFREG (Regular file)
|
||||
* - 0xA000
|
||||
- S\_IFLNK (Symbolic link)
|
||||
- S_IFLNK (Symbolic link)
|
||||
* - 0xC000
|
||||
- S\_IFSOCK (Socket)
|
||||
- S_IFSOCK (Socket)
|
||||
|
||||
.. _i_flags:
|
||||
|
||||
@ -234,56 +234,56 @@ The ``i_flags`` field is a combination of these values:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x1
|
||||
- This file requires secure deletion (EXT4\_SECRM\_FL). (not implemented)
|
||||
- This file requires secure deletion (EXT4_SECRM_FL). (not implemented)
|
||||
* - 0x2
|
||||
- This file should be preserved, should undeletion be desired
|
||||
(EXT4\_UNRM\_FL). (not implemented)
|
||||
(EXT4_UNRM_FL). (not implemented)
|
||||
* - 0x4
|
||||
- File is compressed (EXT4\_COMPR\_FL). (not really implemented)
|
||||
- File is compressed (EXT4_COMPR_FL). (not really implemented)
|
||||
* - 0x8
|
||||
- All writes to the file must be synchronous (EXT4\_SYNC\_FL).
|
||||
- All writes to the file must be synchronous (EXT4_SYNC_FL).
|
||||
* - 0x10
|
||||
- File is immutable (EXT4\_IMMUTABLE\_FL).
|
||||
- File is immutable (EXT4_IMMUTABLE_FL).
|
||||
* - 0x20
|
||||
- File can only be appended (EXT4\_APPEND\_FL).
|
||||
- File can only be appended (EXT4_APPEND_FL).
|
||||
* - 0x40
|
||||
- The dump(1) utility should not dump this file (EXT4\_NODUMP\_FL).
|
||||
- The dump(1) utility should not dump this file (EXT4_NODUMP_FL).
|
||||
* - 0x80
|
||||
- Do not update access time (EXT4\_NOATIME\_FL).
|
||||
- Do not update access time (EXT4_NOATIME_FL).
|
||||
* - 0x100
|
||||
- Dirty compressed file (EXT4\_DIRTY\_FL). (not used)
|
||||
- Dirty compressed file (EXT4_DIRTY_FL). (not used)
|
||||
* - 0x200
|
||||
- File has one or more compressed clusters (EXT4\_COMPRBLK\_FL). (not used)
|
||||
- File has one or more compressed clusters (EXT4_COMPRBLK_FL). (not used)
|
||||
* - 0x400
|
||||
- Do not compress file (EXT4\_NOCOMPR\_FL). (not used)
|
||||
- Do not compress file (EXT4_NOCOMPR_FL). (not used)
|
||||
* - 0x800
|
||||
- Encrypted inode (EXT4\_ENCRYPT\_FL). This bit value previously was
|
||||
EXT4\_ECOMPR\_FL (compression error), which was never used.
|
||||
- Encrypted inode (EXT4_ENCRYPT_FL). This bit value previously was
|
||||
EXT4_ECOMPR_FL (compression error), which was never used.
|
||||
* - 0x1000
|
||||
- Directory has hashed indexes (EXT4\_INDEX\_FL).
|
||||
- Directory has hashed indexes (EXT4_INDEX_FL).
|
||||
* - 0x2000
|
||||
- AFS magic directory (EXT4\_IMAGIC\_FL).
|
||||
- AFS magic directory (EXT4_IMAGIC_FL).
|
||||
* - 0x4000
|
||||
- File data must always be written through the journal
|
||||
(EXT4\_JOURNAL\_DATA\_FL).
|
||||
(EXT4_JOURNAL_DATA_FL).
|
||||
* - 0x8000
|
||||
- File tail should not be merged (EXT4\_NOTAIL\_FL). (not used by ext4)
|
||||
- File tail should not be merged (EXT4_NOTAIL_FL). (not used by ext4)
|
||||
* - 0x10000
|
||||
- All directory entry data should be written synchronously (see
|
||||
``dirsync``) (EXT4\_DIRSYNC\_FL).
|
||||
``dirsync``) (EXT4_DIRSYNC_FL).
|
||||
* - 0x20000
|
||||
- Top of directory hierarchy (EXT4\_TOPDIR\_FL).
|
||||
- Top of directory hierarchy (EXT4_TOPDIR_FL).
|
||||
* - 0x40000
|
||||
- This is a huge file (EXT4\_HUGE\_FILE\_FL).
|
||||
- This is a huge file (EXT4_HUGE_FILE_FL).
|
||||
* - 0x80000
|
||||
- Inode uses extents (EXT4\_EXTENTS\_FL).
|
||||
- Inode uses extents (EXT4_EXTENTS_FL).
|
||||
* - 0x100000
|
||||
- Verity protected file (EXT4\_VERITY\_FL).
|
||||
- Verity protected file (EXT4_VERITY_FL).
|
||||
* - 0x200000
|
||||
- Inode stores a large extended attribute value in its data blocks
|
||||
(EXT4\_EA\_INODE\_FL).
|
||||
(EXT4_EA_INODE_FL).
|
||||
* - 0x400000
|
||||
- This file has blocks allocated past EOF (EXT4\_EOFBLOCKS\_FL).
|
||||
- This file has blocks allocated past EOF (EXT4_EOFBLOCKS_FL).
|
||||
(deprecated)
|
||||
* - 0x01000000
|
||||
- Inode is a snapshot (``EXT4_SNAPFILE_FL``). (not in mainline)
|
||||
@ -294,21 +294,21 @@ The ``i_flags`` field is a combination of these values:
|
||||
- Snapshot shrink has completed (``EXT4_SNAPFILE_SHRUNK_FL``). (not in
|
||||
mainline)
|
||||
* - 0x10000000
|
||||
- Inode has inline data (EXT4\_INLINE\_DATA\_FL).
|
||||
- Inode has inline data (EXT4_INLINE_DATA_FL).
|
||||
* - 0x20000000
|
||||
- Create children with the same project ID (EXT4\_PROJINHERIT\_FL).
|
||||
- Create children with the same project ID (EXT4_PROJINHERIT_FL).
|
||||
* - 0x80000000
|
||||
- Reserved for ext4 library (EXT4\_RESERVED\_FL).
|
||||
- Reserved for ext4 library (EXT4_RESERVED_FL).
|
||||
* -
|
||||
- Aggregate flags:
|
||||
* - 0x705BDFFF
|
||||
- User-visible flags.
|
||||
* - 0x604BC0FF
|
||||
- User-modifiable flags. Note that while EXT4\_JOURNAL\_DATA\_FL and
|
||||
EXT4\_EXTENTS\_FL can be set with setattr, they are not in the kernel's
|
||||
EXT4\_FL\_USER\_MODIFIABLE mask, since it needs to handle the setting of
|
||||
- User-modifiable flags. Note that while EXT4_JOURNAL_DATA_FL and
|
||||
EXT4_EXTENTS_FL can be set with setattr, they are not in the kernel's
|
||||
EXT4_FL_USER_MODIFIABLE mask, since it needs to handle the setting of
|
||||
these flags in a special manner and they are masked out of the set of
|
||||
flags that are saved directly to i\_flags.
|
||||
flags that are saved directly to i_flags.
|
||||
|
||||
.. _i_osd1:
|
||||
|
||||
@ -325,9 +325,9 @@ Linux:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- l\_i\_version
|
||||
- Inode version. However, if the EA\_INODE inode flag is set, this inode
|
||||
- __le32
|
||||
- l_i_version
|
||||
- Inode version. However, if the EA_INODE inode flag is set, this inode
|
||||
stores an extended attribute value and this field contains the upper 32
|
||||
bits of the attribute value's reference count.
|
||||
|
||||
@ -342,8 +342,8 @@ Hurd:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- h\_i\_translator
|
||||
- __le32
|
||||
- h_i_translator
|
||||
- ??
|
||||
|
||||
Masix:
|
||||
@ -357,8 +357,8 @@ Masix:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- m\_i\_reserved
|
||||
- __le32
|
||||
- m_i_reserved
|
||||
- ??
|
||||
|
||||
.. _i_osd2:
|
||||
@ -376,30 +376,30 @@ Linux:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le16
|
||||
- l\_i\_blocks\_high
|
||||
- __le16
|
||||
- l_i_blocks_high
|
||||
- Upper 16-bits of the block count. Please see the note attached to
|
||||
i\_blocks\_lo.
|
||||
i_blocks_lo.
|
||||
* - 0x2
|
||||
- \_\_le16
|
||||
- l\_i\_file\_acl\_high
|
||||
- __le16
|
||||
- l_i_file_acl_high
|
||||
- Upper 16-bits of the extended attribute block (historically, the file
|
||||
ACL location). See the Extended Attributes section below.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- l\_i\_uid\_high
|
||||
- __le16
|
||||
- l_i_uid_high
|
||||
- Upper 16-bits of the Owner UID.
|
||||
* - 0x6
|
||||
- \_\_le16
|
||||
- l\_i\_gid\_high
|
||||
- __le16
|
||||
- l_i_gid_high
|
||||
- Upper 16-bits of the GID.
|
||||
* - 0x8
|
||||
- \_\_le16
|
||||
- l\_i\_checksum\_lo
|
||||
- __le16
|
||||
- l_i_checksum_lo
|
||||
- Lower 16-bits of the inode checksum.
|
||||
* - 0xA
|
||||
- \_\_le16
|
||||
- l\_i\_reserved
|
||||
- __le16
|
||||
- l_i_reserved
|
||||
- Unused.
|
||||
|
||||
Hurd:
|
||||
@ -413,24 +413,24 @@ Hurd:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le16
|
||||
- h\_i\_reserved1
|
||||
- __le16
|
||||
- h_i_reserved1
|
||||
- ??
|
||||
* - 0x2
|
||||
- \_\_u16
|
||||
- h\_i\_mode\_high
|
||||
- __u16
|
||||
- h_i_mode_high
|
||||
- Upper 16-bits of the file mode.
|
||||
* - 0x4
|
||||
- \_\_le16
|
||||
- h\_i\_uid\_high
|
||||
- __le16
|
||||
- h_i_uid_high
|
||||
- Upper 16-bits of the Owner UID.
|
||||
* - 0x6
|
||||
- \_\_le16
|
||||
- h\_i\_gid\_high
|
||||
- __le16
|
||||
- h_i_gid_high
|
||||
- Upper 16-bits of the GID.
|
||||
* - 0x8
|
||||
- \_\_u32
|
||||
- h\_i\_author
|
||||
- __u32
|
||||
- h_i_author
|
||||
- Author code?
|
||||
|
||||
Masix:
|
||||
@ -444,17 +444,17 @@ Masix:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le16
|
||||
- h\_i\_reserved1
|
||||
- __le16
|
||||
- h_i_reserved1
|
||||
- ??
|
||||
* - 0x2
|
||||
- \_\_u16
|
||||
- m\_i\_file\_acl\_high
|
||||
- __u16
|
||||
- m_i_file_acl_high
|
||||
- Upper 16-bits of the extended attribute block (historically, the file
|
||||
ACL location).
|
||||
* - 0x4
|
||||
- \_\_u32
|
||||
- m\_i\_reserved2[2]
|
||||
- __u32
|
||||
- m_i_reserved2[2]
|
||||
- ??
|
||||
|
||||
Inode Size
|
||||
@ -466,11 +466,11 @@ In ext2 and ext3, the inode structure size was fixed at 128 bytes
|
||||
on-disk inode at format time for all inodes in the filesystem to provide
|
||||
space beyond the end of the original ext2 inode. The on-disk inode
|
||||
record size is recorded in the superblock as ``s_inode_size``. The
|
||||
number of bytes actually used by struct ext4\_inode beyond the original
|
||||
number of bytes actually used by struct ext4_inode beyond the original
|
||||
128-byte ext2 inode is recorded in the ``i_extra_isize`` field for each
|
||||
inode, which allows struct ext4\_inode to grow for a new kernel without
|
||||
inode, which allows struct ext4_inode to grow for a new kernel without
|
||||
having to upgrade all of the on-disk inodes. Access to fields beyond
|
||||
EXT2\_GOOD\_OLD\_INODE\_SIZE should be verified to be within
|
||||
EXT2_GOOD_OLD_INODE_SIZE should be verified to be within
|
||||
``i_extra_isize``. By default, ext4 inode records are 256 bytes, and (as
|
||||
of August 2019) the inode structure is 160 bytes
|
||||
(``i_extra_isize = 32``). The extra space between the end of the inode
|
||||
@ -516,7 +516,7 @@ creation time (crtime); this field is 64-bits wide and decoded in the
|
||||
same manner as 64-bit [cma]time. Neither crtime nor dtime are accessible
|
||||
through the regular stat() interface, though debugfs will report them.
|
||||
|
||||
We use the 32-bit signed time value plus (2^32 \* (extra epoch bits)).
|
||||
We use the 32-bit signed time value plus (2^32 * (extra epoch bits)).
|
||||
In other words:
|
||||
|
||||
.. list-table::
|
||||
@ -525,8 +525,8 @@ In other words:
|
||||
|
||||
* - Extra epoch bits
|
||||
- MSB of 32-bit time
|
||||
- Adjustment for signed 32-bit to 64-bit tv\_sec
|
||||
- Decoded 64-bit tv\_sec
|
||||
- Adjustment for signed 32-bit to 64-bit tv_sec
|
||||
- Decoded 64-bit tv_sec
|
||||
- valid time range
|
||||
* - 0 0
|
||||
- 1
|
||||
|
@ -63,8 +63,8 @@ Generally speaking, the journal has this format:
|
||||
:header-rows: 1
|
||||
|
||||
* - Superblock
|
||||
- descriptor\_block (data\_blocks or revocation\_block) [more data or
|
||||
revocations] commmit\_block
|
||||
- descriptor_block (data_blocks or revocation_block) [more data or
|
||||
revocations] commmit_block
|
||||
- [more transactions...]
|
||||
* -
|
||||
- One transaction
|
||||
@ -93,8 +93,8 @@ superblock.
|
||||
* - 1024 bytes of padding
|
||||
- ext4 Superblock
|
||||
- Journal Superblock
|
||||
- descriptor\_block (data\_blocks or revocation\_block) [more data or
|
||||
revocations] commmit\_block
|
||||
- descriptor_block (data_blocks or revocation_block) [more data or
|
||||
revocations] commmit_block
|
||||
- [more transactions...]
|
||||
* -
|
||||
-
|
||||
@ -117,17 +117,17 @@ Every block in the journal starts with a common 12-byte header
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_be32
|
||||
- h\_magic
|
||||
- __be32
|
||||
- h_magic
|
||||
- jbd2 magic number, 0xC03B3998.
|
||||
* - 0x4
|
||||
- \_\_be32
|
||||
- h\_blocktype
|
||||
- __be32
|
||||
- h_blocktype
|
||||
- Description of what this block contains. See the jbd2_blocktype_ table
|
||||
below.
|
||||
* - 0x8
|
||||
- \_\_be32
|
||||
- h\_sequence
|
||||
- __be32
|
||||
- h_sequence
|
||||
- The transaction ID that goes with this block.
|
||||
|
||||
.. _jbd2_blocktype:
|
||||
@ -177,99 +177,99 @@ which is 1024 bytes long:
|
||||
-
|
||||
- Static information describing the journal.
|
||||
* - 0x0
|
||||
- journal\_header\_t (12 bytes)
|
||||
- s\_header
|
||||
- journal_header_t (12 bytes)
|
||||
- s_header
|
||||
- Common header identifying this as a superblock.
|
||||
* - 0xC
|
||||
- \_\_be32
|
||||
- s\_blocksize
|
||||
- __be32
|
||||
- s_blocksize
|
||||
- Journal device block size.
|
||||
* - 0x10
|
||||
- \_\_be32
|
||||
- s\_maxlen
|
||||
- __be32
|
||||
- s_maxlen
|
||||
- Total number of blocks in this journal.
|
||||
* - 0x14
|
||||
- \_\_be32
|
||||
- s\_first
|
||||
- __be32
|
||||
- s_first
|
||||
- First block of log information.
|
||||
* -
|
||||
-
|
||||
-
|
||||
- Dynamic information describing the current state of the log.
|
||||
* - 0x18
|
||||
- \_\_be32
|
||||
- s\_sequence
|
||||
- __be32
|
||||
- s_sequence
|
||||
- First commit ID expected in log.
|
||||
* - 0x1C
|
||||
- \_\_be32
|
||||
- s\_start
|
||||
- __be32
|
||||
- s_start
|
||||
- Block number of the start of log. Contrary to the comments, this field
|
||||
being zero does not imply that the journal is clean!
|
||||
* - 0x20
|
||||
- \_\_be32
|
||||
- s\_errno
|
||||
- Error value, as set by jbd2\_journal\_abort().
|
||||
- __be32
|
||||
- s_errno
|
||||
- Error value, as set by jbd2_journal_abort().
|
||||
* -
|
||||
-
|
||||
-
|
||||
- The remaining fields are only valid in a v2 superblock.
|
||||
* - 0x24
|
||||
- \_\_be32
|
||||
- s\_feature\_compat;
|
||||
- __be32
|
||||
- s_feature_compat;
|
||||
- Compatible feature set. See the table jbd2_compat_ below.
|
||||
* - 0x28
|
||||
- \_\_be32
|
||||
- s\_feature\_incompat
|
||||
- __be32
|
||||
- s_feature_incompat
|
||||
- Incompatible feature set. See the table jbd2_incompat_ below.
|
||||
* - 0x2C
|
||||
- \_\_be32
|
||||
- s\_feature\_ro\_compat
|
||||
- __be32
|
||||
- s_feature_ro_compat
|
||||
- Read-only compatible feature set. There aren't any of these currently.
|
||||
* - 0x30
|
||||
- \_\_u8
|
||||
- s\_uuid[16]
|
||||
- __u8
|
||||
- s_uuid[16]
|
||||
- 128-bit uuid for journal. This is compared against the copy in the ext4
|
||||
super block at mount time.
|
||||
* - 0x40
|
||||
- \_\_be32
|
||||
- s\_nr\_users
|
||||
- __be32
|
||||
- s_nr_users
|
||||
- Number of file systems sharing this journal.
|
||||
* - 0x44
|
||||
- \_\_be32
|
||||
- s\_dynsuper
|
||||
- __be32
|
||||
- s_dynsuper
|
||||
- Location of dynamic super block copy. (Not used?)
|
||||
* - 0x48
|
||||
- \_\_be32
|
||||
- s\_max\_transaction
|
||||
- __be32
|
||||
- s_max_transaction
|
||||
- Limit of journal blocks per transaction. (Not used?)
|
||||
* - 0x4C
|
||||
- \_\_be32
|
||||
- s\_max\_trans\_data
|
||||
- __be32
|
||||
- s_max_trans_data
|
||||
- Limit of data blocks per transaction. (Not used?)
|
||||
* - 0x50
|
||||
- \_\_u8
|
||||
- s\_checksum\_type
|
||||
- __u8
|
||||
- s_checksum_type
|
||||
- Checksum algorithm used for the journal. See jbd2_checksum_type_ for
|
||||
more info.
|
||||
* - 0x51
|
||||
- \_\_u8[3]
|
||||
- s\_padding2
|
||||
- __u8[3]
|
||||
- s_padding2
|
||||
-
|
||||
* - 0x54
|
||||
- \_\_be32
|
||||
- s\_num\_fc\_blocks
|
||||
- __be32
|
||||
- s_num_fc_blocks
|
||||
- Number of fast commit blocks in the journal.
|
||||
* - 0x58
|
||||
- \_\_u32
|
||||
- s\_padding[42]
|
||||
- __u32
|
||||
- s_padding[42]
|
||||
-
|
||||
* - 0xFC
|
||||
- \_\_be32
|
||||
- s\_checksum
|
||||
- __be32
|
||||
- s_checksum
|
||||
- Checksum of the entire superblock, with this field set to zero.
|
||||
* - 0x100
|
||||
- \_\_u8
|
||||
- s\_users[16\*48]
|
||||
- __u8
|
||||
- s_users[16*48]
|
||||
- ids of all file systems sharing the log. e2fsprogs/Linux don't allow
|
||||
shared external journals, but I imagine Lustre (or ocfs2?), which use
|
||||
the jbd2 code, might.
|
||||
@ -286,7 +286,7 @@ The journal compat features are any combination of the following:
|
||||
- Description
|
||||
* - 0x1
|
||||
- Journal maintains checksums on the data blocks.
|
||||
(JBD2\_FEATURE\_COMPAT\_CHECKSUM)
|
||||
(JBD2_FEATURE_COMPAT_CHECKSUM)
|
||||
|
||||
.. _jbd2_incompat:
|
||||
|
||||
@ -299,23 +299,23 @@ The journal incompat features are any combination of the following:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x1
|
||||
- Journal has block revocation records. (JBD2\_FEATURE\_INCOMPAT\_REVOKE)
|
||||
- Journal has block revocation records. (JBD2_FEATURE_INCOMPAT_REVOKE)
|
||||
* - 0x2
|
||||
- Journal can deal with 64-bit block numbers.
|
||||
(JBD2\_FEATURE\_INCOMPAT\_64BIT)
|
||||
(JBD2_FEATURE_INCOMPAT_64BIT)
|
||||
* - 0x4
|
||||
- Journal commits asynchronously. (JBD2\_FEATURE\_INCOMPAT\_ASYNC\_COMMIT)
|
||||
- Journal commits asynchronously. (JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
|
||||
* - 0x8
|
||||
- This journal uses v2 of the checksum on-disk format. Each journal
|
||||
metadata block gets its own checksum, and the block tags in the
|
||||
descriptor table contain checksums for each of the data blocks in the
|
||||
journal. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2)
|
||||
journal. (JBD2_FEATURE_INCOMPAT_CSUM_V2)
|
||||
* - 0x10
|
||||
- This journal uses v3 of the checksum on-disk format. This is the same as
|
||||
v2, but the journal block tag size is fixed regardless of the size of
|
||||
block numbers. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3)
|
||||
block numbers. (JBD2_FEATURE_INCOMPAT_CSUM_V3)
|
||||
* - 0x20
|
||||
- Journal has fast commit blocks. (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT)
|
||||
- Journal has fast commit blocks. (JBD2_FEATURE_INCOMPAT_FAST_COMMIT)
|
||||
|
||||
.. _jbd2_checksum_type:
|
||||
|
||||
@ -355,11 +355,11 @@ Descriptor blocks consume at least 36 bytes, but use a full block:
|
||||
- Name
|
||||
- Descriptor
|
||||
* - 0x0
|
||||
- journal\_header\_t
|
||||
- journal_header_t
|
||||
- (open coded)
|
||||
- Common block header.
|
||||
* - 0xC
|
||||
- struct journal\_block\_tag\_s
|
||||
- struct journal_block_tag_s
|
||||
- open coded array[]
|
||||
- Enough tags either to fill up the block or to describe all the data
|
||||
blocks that follow this descriptor block.
|
||||
@ -367,7 +367,7 @@ Descriptor blocks consume at least 36 bytes, but use a full block:
|
||||
Journal block tags have any of the following formats, depending on which
|
||||
journal feature and block tag flags are set.
|
||||
|
||||
If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 is set, the journal block tag is
|
||||
If JBD2_FEATURE_INCOMPAT_CSUM_V3 is set, the journal block tag is
|
||||
defined as ``struct journal_block_tag3_s``, which looks like the
|
||||
following. The size is 16 or 32 bytes.
|
||||
|
||||
@ -380,24 +380,24 @@ following. The size is 16 or 32 bytes.
|
||||
- Name
|
||||
- Descriptor
|
||||
* - 0x0
|
||||
- \_\_be32
|
||||
- t\_blocknr
|
||||
- __be32
|
||||
- t_blocknr
|
||||
- Lower 32-bits of the location of where the corresponding data block
|
||||
should end up on disk.
|
||||
* - 0x4
|
||||
- \_\_be32
|
||||
- t\_flags
|
||||
- __be32
|
||||
- t_flags
|
||||
- Flags that go with the descriptor. See the table jbd2_tag_flags_ for
|
||||
more info.
|
||||
* - 0x8
|
||||
- \_\_be32
|
||||
- t\_blocknr\_high
|
||||
- __be32
|
||||
- t_blocknr_high
|
||||
- Upper 32-bits of the location of where the corresponding data block
|
||||
should end up on disk. This is zero if JBD2\_FEATURE\_INCOMPAT\_64BIT is
|
||||
should end up on disk. This is zero if JBD2_FEATURE_INCOMPAT_64BIT is
|
||||
not enabled.
|
||||
* - 0xC
|
||||
- \_\_be32
|
||||
- t\_checksum
|
||||
- __be32
|
||||
- t_checksum
|
||||
- Checksum of the journal UUID, the sequence number, and the data block.
|
||||
* -
|
||||
-
|
||||
@ -433,7 +433,7 @@ The journal tag flags are any combination of the following:
|
||||
* - 0x8
|
||||
- This is the last tag in this descriptor block.
|
||||
|
||||
If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 is NOT set, the journal block tag
|
||||
If JBD2_FEATURE_INCOMPAT_CSUM_V3 is NOT set, the journal block tag
|
||||
is defined as ``struct journal_block_tag_s``, which looks like the
|
||||
following. The size is 8, 12, 24, or 28 bytes:
|
||||
|
||||
@ -446,18 +446,18 @@ following. The size is 8, 12, 24, or 28 bytes:
|
||||
- Name
|
||||
- Descriptor
|
||||
* - 0x0
|
||||
- \_\_be32
|
||||
- t\_blocknr
|
||||
- __be32
|
||||
- t_blocknr
|
||||
- Lower 32-bits of the location of where the corresponding data block
|
||||
should end up on disk.
|
||||
* - 0x4
|
||||
- \_\_be16
|
||||
- t\_checksum
|
||||
- __be16
|
||||
- t_checksum
|
||||
- Checksum of the journal UUID, the sequence number, and the data block.
|
||||
Note that only the lower 16 bits are stored.
|
||||
* - 0x6
|
||||
- \_\_be16
|
||||
- t\_flags
|
||||
- __be16
|
||||
- t_flags
|
||||
- Flags that go with the descriptor. See the table jbd2_tag_flags_ for
|
||||
more info.
|
||||
* -
|
||||
@ -466,8 +466,8 @@ following. The size is 8, 12, 24, or 28 bytes:
|
||||
- This next field is only present if the super block indicates support for
|
||||
64-bit block numbers.
|
||||
* - 0x8
|
||||
- \_\_be32
|
||||
- t\_blocknr\_high
|
||||
- __be32
|
||||
- t_blocknr_high
|
||||
- Upper 32-bits of the location of where the corresponding data block
|
||||
should end up on disk.
|
||||
* -
|
||||
@ -483,8 +483,8 @@ following. The size is 8, 12, 24, or 28 bytes:
|
||||
``j_uuid`` field in ``struct journal_s``, but only tune2fs touches that
|
||||
field.
|
||||
|
||||
If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or
|
||||
JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the block is a
|
||||
If JBD2_FEATURE_INCOMPAT_CSUM_V2 or
|
||||
JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the end of the block is a
|
||||
``struct jbd2_journal_block_tail``, which looks like this:
|
||||
|
||||
.. list-table::
|
||||
@ -496,8 +496,8 @@ JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the block is a
|
||||
- Name
|
||||
- Descriptor
|
||||
* - 0x0
|
||||
- \_\_be32
|
||||
- t\_checksum
|
||||
- __be32
|
||||
- t_checksum
|
||||
- Checksum of the journal UUID + the descriptor block, with this field set
|
||||
to zero.
|
||||
|
||||
@ -538,25 +538,25 @@ length, but use a full block:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- journal\_header\_t
|
||||
- r\_header
|
||||
- journal_header_t
|
||||
- r_header
|
||||
- Common block header.
|
||||
* - 0xC
|
||||
- \_\_be32
|
||||
- r\_count
|
||||
- __be32
|
||||
- r_count
|
||||
- Number of bytes used in this block.
|
||||
* - 0x10
|
||||
- \_\_be32 or \_\_be64
|
||||
- __be32 or __be64
|
||||
- blocks[0]
|
||||
- Blocks to revoke.
|
||||
|
||||
After r\_count is a linear array of block numbers that are effectively
|
||||
After r_count is a linear array of block numbers that are effectively
|
||||
revoked by this transaction. The size of each block number is 8 bytes if
|
||||
the superblock advertises 64-bit block number support, or 4 bytes
|
||||
otherwise.
|
||||
|
||||
If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or
|
||||
JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the revocation
|
||||
If JBD2_FEATURE_INCOMPAT_CSUM_V2 or
|
||||
JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the end of the revocation
|
||||
block is a ``struct jbd2_journal_revoke_tail``, which has this format:
|
||||
|
||||
.. list-table::
|
||||
@ -568,8 +568,8 @@ block is a ``struct jbd2_journal_revoke_tail``, which has this format:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_be32
|
||||
- r\_checksum
|
||||
- __be32
|
||||
- r_checksum
|
||||
- Checksum of the journal UUID + revocation block
|
||||
|
||||
Commit Block
|
||||
@ -592,38 +592,38 @@ bytes long (but uses a full block):
|
||||
- Name
|
||||
- Descriptor
|
||||
* - 0x0
|
||||
- journal\_header\_s
|
||||
- journal_header_s
|
||||
- (open coded)
|
||||
- Common block header.
|
||||
* - 0xC
|
||||
- unsigned char
|
||||
- h\_chksum\_type
|
||||
- h_chksum_type
|
||||
- The type of checksum to use to verify the integrity of the data blocks
|
||||
in the transaction. See jbd2_checksum_type_ for more info.
|
||||
* - 0xD
|
||||
- unsigned char
|
||||
- h\_chksum\_size
|
||||
- h_chksum_size
|
||||
- The number of bytes used by the checksum. Most likely 4.
|
||||
* - 0xE
|
||||
- unsigned char
|
||||
- h\_padding[2]
|
||||
- h_padding[2]
|
||||
-
|
||||
* - 0x10
|
||||
- \_\_be32
|
||||
- h\_chksum[JBD2\_CHECKSUM\_BYTES]
|
||||
- __be32
|
||||
- h_chksum[JBD2_CHECKSUM_BYTES]
|
||||
- 32 bytes of space to store checksums. If
|
||||
JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3
|
||||
JBD2_FEATURE_INCOMPAT_CSUM_V2 or JBD2_FEATURE_INCOMPAT_CSUM_V3
|
||||
are set, the first ``__be32`` is the checksum of the journal UUID and
|
||||
the entire commit block, with this field zeroed. If
|
||||
JBD2\_FEATURE\_COMPAT\_CHECKSUM is set, the first ``__be32`` is the
|
||||
JBD2_FEATURE_COMPAT_CHECKSUM is set, the first ``__be32`` is the
|
||||
crc32 of all the blocks already written to the transaction.
|
||||
* - 0x30
|
||||
- \_\_be64
|
||||
- h\_commit\_sec
|
||||
- __be64
|
||||
- h_commit_sec
|
||||
- The time that the transaction was committed, in seconds since the epoch.
|
||||
* - 0x38
|
||||
- \_\_be32
|
||||
- h\_commit\_nsec
|
||||
- __be32
|
||||
- h_commit_nsec
|
||||
- Nanoseconds component of the above timestamp.
|
||||
|
||||
Fast commits
|
||||
|
@ -7,8 +7,8 @@ Multiple mount protection (MMP) is a feature that protects the
|
||||
filesystem against multiple hosts trying to use the filesystem
|
||||
simultaneously. When a filesystem is opened (for mounting, or fsck,
|
||||
etc.), the MMP code running on the node (call it node A) checks a
|
||||
sequence number. If the sequence number is EXT4\_MMP\_SEQ\_CLEAN, the
|
||||
open continues. If the sequence number is EXT4\_MMP\_SEQ\_FSCK, then
|
||||
sequence number. If the sequence number is EXT4_MMP_SEQ_CLEAN, the
|
||||
open continues. If the sequence number is EXT4_MMP_SEQ_FSCK, then
|
||||
fsck is (hopefully) running, and open fails immediately. Otherwise, the
|
||||
open code will wait for twice the specified MMP check interval and check
|
||||
the sequence number again. If the sequence number has changed, then the
|
||||
@ -40,38 +40,38 @@ The MMP structure (``struct mmp_struct``) is as follows:
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- mmp\_magic
|
||||
- __le32
|
||||
- mmp_magic
|
||||
- Magic number for MMP, 0x004D4D50 (“MMP”).
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- mmp\_seq
|
||||
- __le32
|
||||
- mmp_seq
|
||||
- Sequence number, updated periodically.
|
||||
* - 0x8
|
||||
- \_\_le64
|
||||
- mmp\_time
|
||||
- __le64
|
||||
- mmp_time
|
||||
- Time that the MMP block was last updated.
|
||||
* - 0x10
|
||||
- char[64]
|
||||
- mmp\_nodename
|
||||
- mmp_nodename
|
||||
- Hostname of the node that opened the filesystem.
|
||||
* - 0x50
|
||||
- char[32]
|
||||
- mmp\_bdevname
|
||||
- mmp_bdevname
|
||||
- Block device name of the filesystem.
|
||||
* - 0x70
|
||||
- \_\_le16
|
||||
- mmp\_check\_interval
|
||||
- __le16
|
||||
- mmp_check_interval
|
||||
- The MMP re-check interval, in seconds.
|
||||
* - 0x72
|
||||
- \_\_le16
|
||||
- mmp\_pad1
|
||||
- __le16
|
||||
- mmp_pad1
|
||||
- Zero.
|
||||
* - 0x74
|
||||
- \_\_le32[226]
|
||||
- mmp\_pad2
|
||||
- __le32[226]
|
||||
- mmp_pad2
|
||||
- Zero.
|
||||
* - 0x3FC
|
||||
- \_\_le32
|
||||
- mmp\_checksum
|
||||
- __le32
|
||||
- mmp_checksum
|
||||
- Checksum of the MMP block.
|
||||
|
@ -7,7 +7,7 @@ An ext4 file system is split into a series of block groups. To reduce
|
||||
performance difficulties due to fragmentation, the block allocator tries
|
||||
very hard to keep each file's blocks within the same group, thereby
|
||||
reducing seek times. The size of a block group is specified in
|
||||
``sb.s_blocks_per_group`` blocks, though it can also calculated as 8 \*
|
||||
``sb.s_blocks_per_group`` blocks, though it can also calculated as 8 *
|
||||
``block_size_in_bytes``. With the default block size of 4KiB, each group
|
||||
will contain 32,768 blocks, for a length of 128MiB. The number of block
|
||||
groups is the size of the device divided by the size of a block group.
|
||||
|
@ -34,7 +34,7 @@ ext4 reserves some inode for special features, as follows:
|
||||
* - 10
|
||||
- Replica inode, used for some non-upstream feature?
|
||||
* - 11
|
||||
- Traditional first non-reserved inode. Usually this is the lost+found directory. See s\_first\_ino in the superblock.
|
||||
- Traditional first non-reserved inode. Usually this is the lost+found directory. See s_first_ino in the superblock.
|
||||
|
||||
Note that there are also some inodes allocated from non-reserved inode numbers
|
||||
for other filesystem features which are not referenced from standard directory
|
||||
@ -47,9 +47,9 @@ hierarchy. These are generally reference from the superblock. They are:
|
||||
* - Superblock field
|
||||
- Description
|
||||
|
||||
* - s\_lpf\_ino
|
||||
* - s_lpf_ino
|
||||
- Inode number of lost+found directory.
|
||||
* - s\_prj\_quota\_inum
|
||||
* - s_prj_quota_inum
|
||||
- Inode number of quota file tracking project quotas
|
||||
* - s\_orphan\_file\_inum
|
||||
* - s_orphan_file_inum
|
||||
- Inode number of file tracking orphan inodes.
|
||||
|
@ -7,7 +7,7 @@ The superblock records various information about the enclosing
|
||||
filesystem, such as block counts, inode counts, supported features,
|
||||
maintenance information, and more.
|
||||
|
||||
If the sparse\_super feature flag is set, redundant copies of the
|
||||
If the sparse_super feature flag is set, redundant copies of the
|
||||
superblock and group descriptors are kept only in the groups whose group
|
||||
number is either 0 or a power of 3, 5, or 7. If the flag is not set,
|
||||
redundant copies are kept in all groups.
|
||||
@ -27,107 +27,107 @@ The ext4 superblock is laid out as follows in
|
||||
- Name
|
||||
- Description
|
||||
* - 0x0
|
||||
- \_\_le32
|
||||
- s\_inodes\_count
|
||||
- __le32
|
||||
- s_inodes_count
|
||||
- Total inode count.
|
||||
* - 0x4
|
||||
- \_\_le32
|
||||
- s\_blocks\_count\_lo
|
||||
- __le32
|
||||
- s_blocks_count_lo
|
||||
- Total block count.
|
||||
* - 0x8
|
||||
- \_\_le32
|
||||
- s\_r\_blocks\_count\_lo
|
||||
- __le32
|
||||
- s_r_blocks_count_lo
|
||||
- This number of blocks can only be allocated by the super-user.
|
||||
* - 0xC
|
||||
- \_\_le32
|
||||
- s\_free\_blocks\_count\_lo
|
||||
- __le32
|
||||
- s_free_blocks_count_lo
|
||||
- Free block count.
|
||||
* - 0x10
|
||||
- \_\_le32
|
||||
- s\_free\_inodes\_count
|
||||
- __le32
|
||||
- s_free_inodes_count
|
||||
- Free inode count.
|
||||
* - 0x14
|
||||
- \_\_le32
|
||||
- s\_first\_data\_block
|
||||
- __le32
|
||||
- s_first_data_block
|
||||
- First data block. This must be at least 1 for 1k-block filesystems and
|
||||
is typically 0 for all other block sizes.
|
||||
* - 0x18
|
||||
- \_\_le32
|
||||
- s\_log\_block\_size
|
||||
- Block size is 2 ^ (10 + s\_log\_block\_size).
|
||||
- __le32
|
||||
- s_log_block_size
|
||||
- Block size is 2 ^ (10 + s_log_block_size).
|
||||
* - 0x1C
|
||||
- \_\_le32
|
||||
- s\_log\_cluster\_size
|
||||
- Cluster size is 2 ^ (10 + s\_log\_cluster\_size) blocks if bigalloc is
|
||||
enabled. Otherwise s\_log\_cluster\_size must equal s\_log\_block\_size.
|
||||
- __le32
|
||||
- s_log_cluster_size
|
||||
- Cluster size is 2 ^ (10 + s_log_cluster_size) blocks if bigalloc is
|
||||
enabled. Otherwise s_log_cluster_size must equal s_log_block_size.
|
||||
* - 0x20
|
||||
- \_\_le32
|
||||
- s\_blocks\_per\_group
|
||||
- __le32
|
||||
- s_blocks_per_group
|
||||
- Blocks per group.
|
||||
* - 0x24
|
||||
- \_\_le32
|
||||
- s\_clusters\_per\_group
|
||||
- __le32
|
||||
- s_clusters_per_group
|
||||
- Clusters per group, if bigalloc is enabled. Otherwise
|
||||
s\_clusters\_per\_group must equal s\_blocks\_per\_group.
|
||||
s_clusters_per_group must equal s_blocks_per_group.
|
||||
* - 0x28
|
||||
- \_\_le32
|
||||
- s\_inodes\_per\_group
|
||||
- __le32
|
||||
- s_inodes_per_group
|
||||
- Inodes per group.
|
||||
* - 0x2C
|
||||
- \_\_le32
|
||||
- s\_mtime
|
||||
- __le32
|
||||
- s_mtime
|
||||
- Mount time, in seconds since the epoch.
|
||||
* - 0x30
|
||||
- \_\_le32
|
||||
- s\_wtime
|
||||
- __le32
|
||||
- s_wtime
|
||||
- Write time, in seconds since the epoch.
|
||||
* - 0x34
|
||||
- \_\_le16
|
||||
- s\_mnt\_count
|
||||
- __le16
|
||||
- s_mnt_count
|
||||
- Number of mounts since the last fsck.
|
||||
* - 0x36
|
||||
- \_\_le16
|
||||
- s\_max\_mnt\_count
|
||||
- __le16
|
||||
- s_max_mnt_count
|
||||
- Number of mounts beyond which a fsck is needed.
|
||||
* - 0x38
|
||||
- \_\_le16
|
||||
- s\_magic
|
||||
- __le16
|
||||
- s_magic
|
||||
- Magic signature, 0xEF53
|
||||
* - 0x3A
|
||||
- \_\_le16
|
||||
- s\_state
|
||||
- __le16
|
||||
- s_state
|
||||
- File system state. See super_state_ for more info.
|
||||
* - 0x3C
|
||||
- \_\_le16
|
||||
- s\_errors
|
||||
- __le16
|
||||
- s_errors
|
||||
- Behaviour when detecting errors. See super_errors_ for more info.
|
||||
* - 0x3E
|
||||
- \_\_le16
|
||||
- s\_minor\_rev\_level
|
||||
- __le16
|
||||
- s_minor_rev_level
|
||||
- Minor revision level.
|
||||
* - 0x40
|
||||
- \_\_le32
|
||||
- s\_lastcheck
|
||||
- __le32
|
||||
- s_lastcheck
|
||||
- Time of last check, in seconds since the epoch.
|
||||
* - 0x44
|
||||
- \_\_le32
|
||||
- s\_checkinterval
|
||||
- __le32
|
||||
- s_checkinterval
|
||||
- Maximum time between checks, in seconds.
|
||||
* - 0x48
|
||||
- \_\_le32
|
||||
- s\_creator\_os
|
||||
- __le32
|
||||
- s_creator_os
|
||||
- Creator OS. See the table super_creator_ for more info.
|
||||
* - 0x4C
|
||||
- \_\_le32
|
||||
- s\_rev\_level
|
||||
- __le32
|
||||
- s_rev_level
|
||||
- Revision level. See the table super_revision_ for more info.
|
||||
* - 0x50
|
||||
- \_\_le16
|
||||
- s\_def\_resuid
|
||||
- __le16
|
||||
- s_def_resuid
|
||||
- Default uid for reserved blocks.
|
||||
* - 0x52
|
||||
- \_\_le16
|
||||
- s\_def\_resgid
|
||||
- __le16
|
||||
- s_def_resgid
|
||||
- Default gid for reserved blocks.
|
||||
* -
|
||||
-
|
||||
@ -143,50 +143,50 @@ The ext4 superblock is laid out as follows in
|
||||
about a feature in either the compatible or incompatible feature set, it
|
||||
must abort and not try to meddle with things it doesn't understand...
|
||||
* - 0x54
|
||||
- \_\_le32
|
||||
- s\_first\_ino
|
||||
- __le32
|
||||
- s_first_ino
|
||||
- First non-reserved inode.
|
||||
* - 0x58
|
||||
- \_\_le16
|
||||
- s\_inode\_size
|
||||
- __le16
|
||||
- s_inode_size
|
||||
- Size of inode structure, in bytes.
|
||||
* - 0x5A
|
||||
- \_\_le16
|
||||
- s\_block\_group\_nr
|
||||
- __le16
|
||||
- s_block_group_nr
|
||||
- Block group # of this superblock.
|
||||
* - 0x5C
|
||||
- \_\_le32
|
||||
- s\_feature\_compat
|
||||
- __le32
|
||||
- s_feature_compat
|
||||
- Compatible feature set flags. Kernel can still read/write this fs even
|
||||
if it doesn't understand a flag; fsck should not do that. See the
|
||||
super_compat_ table for more info.
|
||||
* - 0x60
|
||||
- \_\_le32
|
||||
- s\_feature\_incompat
|
||||
- __le32
|
||||
- s_feature_incompat
|
||||
- Incompatible feature set. If the kernel or fsck doesn't understand one
|
||||
of these bits, it should stop. See the super_incompat_ table for more
|
||||
info.
|
||||
* - 0x64
|
||||
- \_\_le32
|
||||
- s\_feature\_ro\_compat
|
||||
- __le32
|
||||
- s_feature_ro_compat
|
||||
- Readonly-compatible feature set. If the kernel doesn't understand one of
|
||||
these bits, it can still mount read-only. See the super_rocompat_ table
|
||||
for more info.
|
||||
* - 0x68
|
||||
- \_\_u8
|
||||
- s\_uuid[16]
|
||||
- __u8
|
||||
- s_uuid[16]
|
||||
- 128-bit UUID for volume.
|
||||
* - 0x78
|
||||
- char
|
||||
- s\_volume\_name[16]
|
||||
- s_volume_name[16]
|
||||
- Volume label.
|
||||
* - 0x88
|
||||
- char
|
||||
- s\_last\_mounted[64]
|
||||
- s_last_mounted[64]
|
||||
- Directory where filesystem was last mounted.
|
||||
* - 0xC8
|
||||
- \_\_le32
|
||||
- s\_algorithm\_usage\_bitmap
|
||||
- __le32
|
||||
- s_algorithm_usage_bitmap
|
||||
- For compression (Not used in e2fsprogs/Linux)
|
||||
* -
|
||||
-
|
||||
@ -194,18 +194,18 @@ The ext4 superblock is laid out as follows in
|
||||
- Performance hints. Directory preallocation should only happen if the
|
||||
EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
|
||||
* - 0xCC
|
||||
- \_\_u8
|
||||
- s\_prealloc\_blocks
|
||||
- __u8
|
||||
- s_prealloc_blocks
|
||||
- #. of blocks to try to preallocate for ... files? (Not used in
|
||||
e2fsprogs/Linux)
|
||||
* - 0xCD
|
||||
- \_\_u8
|
||||
- s\_prealloc\_dir\_blocks
|
||||
- __u8
|
||||
- s_prealloc_dir_blocks
|
||||
- #. of blocks to preallocate for directories. (Not used in
|
||||
e2fsprogs/Linux)
|
||||
* - 0xCE
|
||||
- \_\_le16
|
||||
- s\_reserved\_gdt\_blocks
|
||||
- __le16
|
||||
- s_reserved_gdt_blocks
|
||||
- Number of reserved GDT entries for future filesystem expansion.
|
||||
* -
|
||||
-
|
||||
@ -213,281 +213,281 @@ The ext4 superblock is laid out as follows in
|
||||
- Journalling support is valid only if EXT4_FEATURE_COMPAT_HAS_JOURNAL is
|
||||
set.
|
||||
* - 0xD0
|
||||
- \_\_u8
|
||||
- s\_journal\_uuid[16]
|
||||
- __u8
|
||||
- s_journal_uuid[16]
|
||||
- UUID of journal superblock
|
||||
* - 0xE0
|
||||
- \_\_le32
|
||||
- s\_journal\_inum
|
||||
- __le32
|
||||
- s_journal_inum
|
||||
- inode number of journal file.
|
||||
* - 0xE4
|
||||
- \_\_le32
|
||||
- s\_journal\_dev
|
||||
- __le32
|
||||
- s_journal_dev
|
||||
- Device number of journal file, if the external journal feature flag is
|
||||
set.
|
||||
* - 0xE8
|
||||
- \_\_le32
|
||||
- s\_last\_orphan
|
||||
- __le32
|
||||
- s_last_orphan
|
||||
- Start of list of orphaned inodes to delete.
|
||||
* - 0xEC
|
||||
- \_\_le32
|
||||
- s\_hash\_seed[4]
|
||||
- __le32
|
||||
- s_hash_seed[4]
|
||||
- HTREE hash seed.
|
||||
* - 0xFC
|
||||
- \_\_u8
|
||||
- s\_def\_hash\_version
|
||||
- __u8
|
||||
- s_def_hash_version
|
||||
- Default hash algorithm to use for directory hashes. See super_def_hash_
|
||||
for more info.
|
||||
* - 0xFD
|
||||
- \_\_u8
|
||||
- s\_jnl\_backup\_type
|
||||
- If this value is 0 or EXT3\_JNL\_BACKUP\_BLOCKS (1), then the
|
||||
- __u8
|
||||
- s_jnl_backup_type
|
||||
- If this value is 0 or EXT3_JNL_BACKUP_BLOCKS (1), then the
|
||||
``s_jnl_blocks`` field contains a duplicate copy of the inode's
|
||||
``i_block[]`` array and ``i_size``.
|
||||
* - 0xFE
|
||||
- \_\_le16
|
||||
- s\_desc\_size
|
||||
- __le16
|
||||
- s_desc_size
|
||||
- Size of group descriptors, in bytes, if the 64bit incompat feature flag
|
||||
is set.
|
||||
* - 0x100
|
||||
- \_\_le32
|
||||
- s\_default\_mount\_opts
|
||||
- __le32
|
||||
- s_default_mount_opts
|
||||
- Default mount options. See the super_mountopts_ table for more info.
|
||||
* - 0x104
|
||||
- \_\_le32
|
||||
- s\_first\_meta\_bg
|
||||
- First metablock block group, if the meta\_bg feature is enabled.
|
||||
- __le32
|
||||
- s_first_meta_bg
|
||||
- First metablock block group, if the meta_bg feature is enabled.
|
||||
* - 0x108
|
||||
- \_\_le32
|
||||
- s\_mkfs\_time
|
||||
- __le32
|
||||
- s_mkfs_time
|
||||
- When the filesystem was created, in seconds since the epoch.
|
||||
* - 0x10C
|
||||
- \_\_le32
|
||||
- s\_jnl\_blocks[17]
|
||||
- __le32
|
||||
- s_jnl_blocks[17]
|
||||
- Backup copy of the journal inode's ``i_block[]`` array in the first 15
|
||||
elements and i\_size\_high and i\_size in the 16th and 17th elements,
|
||||
elements and i_size_high and i_size in the 16th and 17th elements,
|
||||
respectively.
|
||||
* -
|
||||
-
|
||||
-
|
||||
- 64bit support is valid only if EXT4_FEATURE_COMPAT_64BIT is set.
|
||||
* - 0x150
|
||||
- \_\_le32
|
||||
- s\_blocks\_count\_hi
|
||||
- __le32
|
||||
- s_blocks_count_hi
|
||||
- High 32-bits of the block count.
|
||||
* - 0x154
|
||||
- \_\_le32
|
||||
- s\_r\_blocks\_count\_hi
|
||||
- __le32
|
||||
- s_r_blocks_count_hi
|
||||
- High 32-bits of the reserved block count.
|
||||
* - 0x158
|
||||
- \_\_le32
|
||||
- s\_free\_blocks\_count\_hi
|
||||
- __le32
|
||||
- s_free_blocks_count_hi
|
||||
- High 32-bits of the free block count.
|
||||
* - 0x15C
|
||||
- \_\_le16
|
||||
- s\_min\_extra\_isize
|
||||
- __le16
|
||||
- s_min_extra_isize
|
||||
- All inodes have at least # bytes.
|
||||
* - 0x15E
|
||||
- \_\_le16
|
||||
- s\_want\_extra\_isize
|
||||
- __le16
|
||||
- s_want_extra_isize
|
||||
- New inodes should reserve # bytes.
|
||||
* - 0x160
|
||||
- \_\_le32
|
||||
- s\_flags
|
||||
- __le32
|
||||
- s_flags
|
||||
- Miscellaneous flags. See the super_flags_ table for more info.
|
||||
* - 0x164
|
||||
- \_\_le16
|
||||
- s\_raid\_stride
|
||||
- __le16
|
||||
- s_raid_stride
|
||||
- RAID stride. This is the number of logical blocks read from or written
|
||||
to the disk before moving to the next disk. This affects the placement
|
||||
of filesystem metadata, which will hopefully make RAID storage faster.
|
||||
* - 0x166
|
||||
- \_\_le16
|
||||
- s\_mmp\_interval
|
||||
- __le16
|
||||
- s_mmp_interval
|
||||
- #. seconds to wait in multi-mount prevention (MMP) checking. In theory,
|
||||
MMP is a mechanism to record in the superblock which host and device
|
||||
have mounted the filesystem, in order to prevent multiple mounts. This
|
||||
feature does not seem to be implemented...
|
||||
* - 0x168
|
||||
- \_\_le64
|
||||
- s\_mmp\_block
|
||||
- __le64
|
||||
- s_mmp_block
|
||||
- Block # for multi-mount protection data.
|
||||
* - 0x170
|
||||
- \_\_le32
|
||||
- s\_raid\_stripe\_width
|
||||
- __le32
|
||||
- s_raid_stripe_width
|
||||
- RAID stripe width. This is the number of logical blocks read from or
|
||||
written to the disk before coming back to the current disk. This is used
|
||||
by the block allocator to try to reduce the number of read-modify-write
|
||||
operations in a RAID5/6.
|
||||
* - 0x174
|
||||
- \_\_u8
|
||||
- s\_log\_groups\_per\_flex
|
||||
- __u8
|
||||
- s_log_groups_per_flex
|
||||
- Size of a flexible block group is 2 ^ ``s_log_groups_per_flex``.
|
||||
* - 0x175
|
||||
- \_\_u8
|
||||
- s\_checksum\_type
|
||||
- __u8
|
||||
- s_checksum_type
|
||||
- Metadata checksum algorithm type. The only valid value is 1 (crc32c).
|
||||
* - 0x176
|
||||
- \_\_le16
|
||||
- s\_reserved\_pad
|
||||
- __le16
|
||||
- s_reserved_pad
|
||||
-
|
||||
* - 0x178
|
||||
- \_\_le64
|
||||
- s\_kbytes\_written
|
||||
- __le64
|
||||
- s_kbytes_written
|
||||
- Number of KiB written to this filesystem over its lifetime.
|
||||
* - 0x180
|
||||
- \_\_le32
|
||||
- s\_snapshot\_inum
|
||||
- __le32
|
||||
- s_snapshot_inum
|
||||
- inode number of active snapshot. (Not used in e2fsprogs/Linux.)
|
||||
* - 0x184
|
||||
- \_\_le32
|
||||
- s\_snapshot\_id
|
||||
- __le32
|
||||
- s_snapshot_id
|
||||
- Sequential ID of active snapshot. (Not used in e2fsprogs/Linux.)
|
||||
* - 0x188
|
||||
- \_\_le64
|
||||
- s\_snapshot\_r\_blocks\_count
|
||||
- __le64
|
||||
- s_snapshot_r_blocks_count
|
||||
- Number of blocks reserved for active snapshot's future use. (Not used in
|
||||
e2fsprogs/Linux.)
|
||||
* - 0x190
|
||||
- \_\_le32
|
||||
- s\_snapshot\_list
|
||||
- __le32
|
||||
- s_snapshot_list
|
||||
- inode number of the head of the on-disk snapshot list. (Not used in
|
||||
e2fsprogs/Linux.)
|
||||
* - 0x194
|
||||
- \_\_le32
|
||||
- s\_error\_count
|
||||
- __le32
|
||||
- s_error_count
|
||||
- Number of errors seen.
|
||||
* - 0x198
|
||||
- \_\_le32
|
||||
- s\_first\_error\_time
|
||||
- __le32
|
||||
- s_first_error_time
|
||||
- First time an error happened, in seconds since the epoch.
|
||||
* - 0x19C
|
||||
- \_\_le32
|
||||
- s\_first\_error\_ino
|
||||
- __le32
|
||||
- s_first_error_ino
|
||||
- inode involved in first error.
|
||||
* - 0x1A0
|
||||
- \_\_le64
|
||||
- s\_first\_error\_block
|
||||
- __le64
|
||||
- s_first_error_block
|
||||
- Number of block involved of first error.
|
||||
* - 0x1A8
|
||||
- \_\_u8
|
||||
- s\_first\_error\_func[32]
|
||||
- __u8
|
||||
- s_first_error_func[32]
|
||||
- Name of function where the error happened.
|
||||
* - 0x1C8
|
||||
- \_\_le32
|
||||
- s\_first\_error\_line
|
||||
- __le32
|
||||
- s_first_error_line
|
||||
- Line number where error happened.
|
||||
* - 0x1CC
|
||||
- \_\_le32
|
||||
- s\_last\_error\_time
|
||||
- __le32
|
||||
- s_last_error_time
|
||||
- Time of most recent error, in seconds since the epoch.
|
||||
* - 0x1D0
|
||||
- \_\_le32
|
||||
- s\_last\_error\_ino
|
||||
- __le32
|
||||
- s_last_error_ino
|
||||
- inode involved in most recent error.
|
||||
* - 0x1D4
|
||||
- \_\_le32
|
||||
- s\_last\_error\_line
|
||||
- __le32
|
||||
- s_last_error_line
|
||||
- Line number where most recent error happened.
|
||||
* - 0x1D8
|
||||
- \_\_le64
|
||||
- s\_last\_error\_block
|
||||
- __le64
|
||||
- s_last_error_block
|
||||
- Number of block involved in most recent error.
|
||||
* - 0x1E0
|
||||
- \_\_u8
|
||||
- s\_last\_error\_func[32]
|
||||
- __u8
|
||||
- s_last_error_func[32]
|
||||
- Name of function where the most recent error happened.
|
||||
* - 0x200
|
||||
- \_\_u8
|
||||
- s\_mount\_opts[64]
|
||||
- __u8
|
||||
- s_mount_opts[64]
|
||||
- ASCIIZ string of mount options.
|
||||
* - 0x240
|
||||
- \_\_le32
|
||||
- s\_usr\_quota\_inum
|
||||
- __le32
|
||||
- s_usr_quota_inum
|
||||
- Inode number of user `quota <quota>`__ file.
|
||||
* - 0x244
|
||||
- \_\_le32
|
||||
- s\_grp\_quota\_inum
|
||||
- __le32
|
||||
- s_grp_quota_inum
|
||||
- Inode number of group `quota <quota>`__ file.
|
||||
* - 0x248
|
||||
- \_\_le32
|
||||
- s\_overhead\_blocks
|
||||
- __le32
|
||||
- s_overhead_blocks
|
||||
- Overhead blocks/clusters in fs. (Huh? This field is always zero, which
|
||||
means that the kernel calculates it dynamically.)
|
||||
* - 0x24C
|
||||
- \_\_le32
|
||||
- s\_backup\_bgs[2]
|
||||
- Block groups containing superblock backups (if sparse\_super2)
|
||||
- __le32
|
||||
- s_backup_bgs[2]
|
||||
- Block groups containing superblock backups (if sparse_super2)
|
||||
* - 0x254
|
||||
- \_\_u8
|
||||
- s\_encrypt\_algos[4]
|
||||
- __u8
|
||||
- s_encrypt_algos[4]
|
||||
- Encryption algorithms in use. There can be up to four algorithms in use
|
||||
at any time; valid algorithm codes are given in the super_encrypt_ table
|
||||
below.
|
||||
* - 0x258
|
||||
- \_\_u8
|
||||
- s\_encrypt\_pw\_salt[16]
|
||||
- __u8
|
||||
- s_encrypt_pw_salt[16]
|
||||
- Salt for the string2key algorithm for encryption.
|
||||
* - 0x268
|
||||
- \_\_le32
|
||||
- s\_lpf\_ino
|
||||
- __le32
|
||||
- s_lpf_ino
|
||||
- Inode number of lost+found
|
||||
* - 0x26C
|
||||
- \_\_le32
|
||||
- s\_prj\_quota\_inum
|
||||
- __le32
|
||||
- s_prj_quota_inum
|
||||
- Inode that tracks project quotas.
|
||||
* - 0x270
|
||||
- \_\_le32
|
||||
- s\_checksum\_seed
|
||||
- Checksum seed used for metadata\_csum calculations. This value is
|
||||
crc32c(~0, $orig\_fs\_uuid).
|
||||
- __le32
|
||||
- s_checksum_seed
|
||||
- Checksum seed used for metadata_csum calculations. This value is
|
||||
crc32c(~0, $orig_fs_uuid).
|
||||
* - 0x274
|
||||
- \_\_u8
|
||||
- s\_wtime_hi
|
||||
- __u8
|
||||
- s_wtime_hi
|
||||
- Upper 8 bits of the s_wtime field.
|
||||
* - 0x275
|
||||
- \_\_u8
|
||||
- s\_mtime_hi
|
||||
- __u8
|
||||
- s_mtime_hi
|
||||
- Upper 8 bits of the s_mtime field.
|
||||
* - 0x276
|
||||
- \_\_u8
|
||||
- s\_mkfs_time_hi
|
||||
- __u8
|
||||
- s_mkfs_time_hi
|
||||
- Upper 8 bits of the s_mkfs_time field.
|
||||
* - 0x277
|
||||
- \_\_u8
|
||||
- s\_lastcheck_hi
|
||||
- __u8
|
||||
- s_lastcheck_hi
|
||||
- Upper 8 bits of the s_lastcheck_hi field.
|
||||
* - 0x278
|
||||
- \_\_u8
|
||||
- s\_first_error_time_hi
|
||||
- __u8
|
||||
- s_first_error_time_hi
|
||||
- Upper 8 bits of the s_first_error_time_hi field.
|
||||
* - 0x279
|
||||
- \_\_u8
|
||||
- s\_last_error_time_hi
|
||||
- __u8
|
||||
- s_last_error_time_hi
|
||||
- Upper 8 bits of the s_last_error_time_hi field.
|
||||
* - 0x27A
|
||||
- \_\_u8
|
||||
- s\_pad[2]
|
||||
- __u8
|
||||
- s_pad[2]
|
||||
- Zero padding.
|
||||
* - 0x27C
|
||||
- \_\_le16
|
||||
- s\_encoding
|
||||
- __le16
|
||||
- s_encoding
|
||||
- Filename charset encoding.
|
||||
* - 0x27E
|
||||
- \_\_le16
|
||||
- s\_encoding_flags
|
||||
- __le16
|
||||
- s_encoding_flags
|
||||
- Filename charset encoding flags.
|
||||
* - 0x280
|
||||
- \_\_le32
|
||||
- s\_orphan\_file\_inum
|
||||
- __le32
|
||||
- s_orphan_file_inum
|
||||
- Orphan file inode number.
|
||||
* - 0x284
|
||||
- \_\_le32
|
||||
- s\_reserved[94]
|
||||
- __le32
|
||||
- s_reserved[94]
|
||||
- Padding to the end of the block.
|
||||
* - 0x3FC
|
||||
- \_\_le32
|
||||
- s\_checksum
|
||||
- __le32
|
||||
- s_checksum
|
||||
- Superblock checksum.
|
||||
|
||||
.. _super_state:
|
||||
@ -574,44 +574,44 @@ following:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x1
|
||||
- Directory preallocation (COMPAT\_DIR\_PREALLOC).
|
||||
- Directory preallocation (COMPAT_DIR_PREALLOC).
|
||||
* - 0x2
|
||||
- “imagic inodes”. Not clear from the code what this does
|
||||
(COMPAT\_IMAGIC\_INODES).
|
||||
(COMPAT_IMAGIC_INODES).
|
||||
* - 0x4
|
||||
- Has a journal (COMPAT\_HAS\_JOURNAL).
|
||||
- Has a journal (COMPAT_HAS_JOURNAL).
|
||||
* - 0x8
|
||||
- Supports extended attributes (COMPAT\_EXT\_ATTR).
|
||||
- Supports extended attributes (COMPAT_EXT_ATTR).
|
||||
* - 0x10
|
||||
- Has reserved GDT blocks for filesystem expansion
|
||||
(COMPAT\_RESIZE\_INODE). Requires RO\_COMPAT\_SPARSE\_SUPER.
|
||||
(COMPAT_RESIZE_INODE). Requires RO_COMPAT_SPARSE_SUPER.
|
||||
* - 0x20
|
||||
- Has directory indices (COMPAT\_DIR\_INDEX).
|
||||
- Has directory indices (COMPAT_DIR_INDEX).
|
||||
* - 0x40
|
||||
- “Lazy BG”. Not in Linux kernel, seems to have been for uninitialized
|
||||
block groups? (COMPAT\_LAZY\_BG)
|
||||
block groups? (COMPAT_LAZY_BG)
|
||||
* - 0x80
|
||||
- “Exclude inode”. Not used. (COMPAT\_EXCLUDE\_INODE).
|
||||
- “Exclude inode”. Not used. (COMPAT_EXCLUDE_INODE).
|
||||
* - 0x100
|
||||
- “Exclude bitmap”. Seems to be used to indicate the presence of
|
||||
snapshot-related exclude bitmaps? Not defined in kernel or used in
|
||||
e2fsprogs (COMPAT\_EXCLUDE\_BITMAP).
|
||||
e2fsprogs (COMPAT_EXCLUDE_BITMAP).
|
||||
* - 0x200
|
||||
- Sparse Super Block, v2. If this flag is set, the SB field s\_backup\_bgs
|
||||
- Sparse Super Block, v2. If this flag is set, the SB field s_backup_bgs
|
||||
points to the two block groups that contain backup superblocks
|
||||
(COMPAT\_SPARSE\_SUPER2).
|
||||
(COMPAT_SPARSE_SUPER2).
|
||||
* - 0x400
|
||||
- Fast commits supported. Although fast commits blocks are
|
||||
backward incompatible, fast commit blocks are not always
|
||||
present in the journal. If fast commit blocks are present in
|
||||
the journal, JBD2 incompat feature
|
||||
(JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) gets
|
||||
set (COMPAT\_FAST\_COMMIT).
|
||||
(JBD2_FEATURE_INCOMPAT_FAST_COMMIT) gets
|
||||
set (COMPAT_FAST_COMMIT).
|
||||
* - 0x1000
|
||||
- Orphan file allocated. This is the special file for more efficient
|
||||
tracking of unlinked but still open inodes. When there may be any
|
||||
entries in the file, we additionally set proper rocompat feature
|
||||
(RO\_COMPAT\_ORPHAN\_PRESENT).
|
||||
(RO_COMPAT_ORPHAN_PRESENT).
|
||||
|
||||
.. _super_incompat:
|
||||
|
||||
@ -625,45 +625,45 @@ following:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x1
|
||||
- Compression (INCOMPAT\_COMPRESSION).
|
||||
- Compression (INCOMPAT_COMPRESSION).
|
||||
* - 0x2
|
||||
- Directory entries record the file type. See ext4\_dir\_entry\_2 below
|
||||
(INCOMPAT\_FILETYPE).
|
||||
- Directory entries record the file type. See ext4_dir_entry_2 below
|
||||
(INCOMPAT_FILETYPE).
|
||||
* - 0x4
|
||||
- Filesystem needs recovery (INCOMPAT\_RECOVER).
|
||||
- Filesystem needs recovery (INCOMPAT_RECOVER).
|
||||
* - 0x8
|
||||
- Filesystem has a separate journal device (INCOMPAT\_JOURNAL\_DEV).
|
||||
- Filesystem has a separate journal device (INCOMPAT_JOURNAL_DEV).
|
||||
* - 0x10
|
||||
- Meta block groups. See the earlier discussion of this feature
|
||||
(INCOMPAT\_META\_BG).
|
||||
(INCOMPAT_META_BG).
|
||||
* - 0x40
|
||||
- Files in this filesystem use extents (INCOMPAT\_EXTENTS).
|
||||
- Files in this filesystem use extents (INCOMPAT_EXTENTS).
|
||||
* - 0x80
|
||||
- Enable a filesystem size of 2^64 blocks (INCOMPAT\_64BIT).
|
||||
- Enable a filesystem size of 2^64 blocks (INCOMPAT_64BIT).
|
||||
* - 0x100
|
||||
- Multiple mount protection (INCOMPAT\_MMP).
|
||||
- Multiple mount protection (INCOMPAT_MMP).
|
||||
* - 0x200
|
||||
- Flexible block groups. See the earlier discussion of this feature
|
||||
(INCOMPAT\_FLEX\_BG).
|
||||
(INCOMPAT_FLEX_BG).
|
||||
* - 0x400
|
||||
- Inodes can be used to store large extended attribute values
|
||||
(INCOMPAT\_EA\_INODE).
|
||||
(INCOMPAT_EA_INODE).
|
||||
* - 0x1000
|
||||
- Data in directory entry (INCOMPAT\_DIRDATA). (Not implemented?)
|
||||
- Data in directory entry (INCOMPAT_DIRDATA). (Not implemented?)
|
||||
* - 0x2000
|
||||
- Metadata checksum seed is stored in the superblock. This feature enables
|
||||
the administrator to change the UUID of a metadata\_csum filesystem
|
||||
the administrator to change the UUID of a metadata_csum filesystem
|
||||
while the filesystem is mounted; without it, the checksum definition
|
||||
requires all metadata blocks to be rewritten (INCOMPAT\_CSUM\_SEED).
|
||||
requires all metadata blocks to be rewritten (INCOMPAT_CSUM_SEED).
|
||||
* - 0x4000
|
||||
- Large directory >2GB or 3-level htree (INCOMPAT\_LARGEDIR). Prior to
|
||||
- Large directory >2GB or 3-level htree (INCOMPAT_LARGEDIR). Prior to
|
||||
this feature, directories could not be larger than 4GiB and could not
|
||||
have an htree more than 2 levels deep. If this feature is enabled,
|
||||
directories can be larger than 4GiB and have a maximum htree depth of 3.
|
||||
* - 0x8000
|
||||
- Data in inode (INCOMPAT\_INLINE\_DATA).
|
||||
- Data in inode (INCOMPAT_INLINE_DATA).
|
||||
* - 0x10000
|
||||
- Encrypted inodes are present on the filesystem. (INCOMPAT\_ENCRYPT).
|
||||
- Encrypted inodes are present on the filesystem. (INCOMPAT_ENCRYPT).
|
||||
|
||||
.. _super_rocompat:
|
||||
|
||||
@ -678,54 +678,54 @@ the following:
|
||||
- Description
|
||||
* - 0x1
|
||||
- Sparse superblocks. See the earlier discussion of this feature
|
||||
(RO\_COMPAT\_SPARSE\_SUPER).
|
||||
(RO_COMPAT_SPARSE_SUPER).
|
||||
* - 0x2
|
||||
- This filesystem has been used to store a file greater than 2GiB
|
||||
(RO\_COMPAT\_LARGE\_FILE).
|
||||
(RO_COMPAT_LARGE_FILE).
|
||||
* - 0x4
|
||||
- Not used in kernel or e2fsprogs (RO\_COMPAT\_BTREE\_DIR).
|
||||
- Not used in kernel or e2fsprogs (RO_COMPAT_BTREE_DIR).
|
||||
* - 0x8
|
||||
- This filesystem has files whose sizes are represented in units of
|
||||
logical blocks, not 512-byte sectors. This implies a very large file
|
||||
indeed! (RO\_COMPAT\_HUGE\_FILE)
|
||||
indeed! (RO_COMPAT_HUGE_FILE)
|
||||
* - 0x10
|
||||
- Group descriptors have checksums. In addition to detecting corruption,
|
||||
this is useful for lazy formatting with uninitialized groups
|
||||
(RO\_COMPAT\_GDT\_CSUM).
|
||||
(RO_COMPAT_GDT_CSUM).
|
||||
* - 0x20
|
||||
- Indicates that the old ext3 32,000 subdirectory limit no longer applies
|
||||
(RO\_COMPAT\_DIR\_NLINK). A directory's i\_links\_count will be set to 1
|
||||
(RO_COMPAT_DIR_NLINK). A directory's i_links_count will be set to 1
|
||||
if it is incremented past 64,999.
|
||||
* - 0x40
|
||||
- Indicates that large inodes exist on this filesystem
|
||||
(RO\_COMPAT\_EXTRA\_ISIZE).
|
||||
(RO_COMPAT_EXTRA_ISIZE).
|
||||
* - 0x80
|
||||
- This filesystem has a snapshot (RO\_COMPAT\_HAS\_SNAPSHOT).
|
||||
- This filesystem has a snapshot (RO_COMPAT_HAS_SNAPSHOT).
|
||||
* - 0x100
|
||||
- `Quota <Quota>`__ (RO\_COMPAT\_QUOTA).
|
||||
- `Quota <Quota>`__ (RO_COMPAT_QUOTA).
|
||||
* - 0x200
|
||||
- This filesystem supports “bigalloc”, which means that file extents are
|
||||
tracked in units of clusters (of blocks) instead of blocks
|
||||
(RO\_COMPAT\_BIGALLOC).
|
||||
(RO_COMPAT_BIGALLOC).
|
||||
* - 0x400
|
||||
- This filesystem supports metadata checksumming.
|
||||
(RO\_COMPAT\_METADATA\_CSUM; implies RO\_COMPAT\_GDT\_CSUM, though
|
||||
GDT\_CSUM must not be set)
|
||||
(RO_COMPAT_METADATA_CSUM; implies RO_COMPAT_GDT_CSUM, though
|
||||
GDT_CSUM must not be set)
|
||||
* - 0x800
|
||||
- Filesystem supports replicas. This feature is neither in the kernel nor
|
||||
e2fsprogs. (RO\_COMPAT\_REPLICA)
|
||||
e2fsprogs. (RO_COMPAT_REPLICA)
|
||||
* - 0x1000
|
||||
- Read-only filesystem image; the kernel will not mount this image
|
||||
read-write and most tools will refuse to write to the image.
|
||||
(RO\_COMPAT\_READONLY)
|
||||
(RO_COMPAT_READONLY)
|
||||
* - 0x2000
|
||||
- Filesystem tracks project quotas. (RO\_COMPAT\_PROJECT)
|
||||
- Filesystem tracks project quotas. (RO_COMPAT_PROJECT)
|
||||
* - 0x8000
|
||||
- Verity inodes may be present on the filesystem. (RO\_COMPAT\_VERITY)
|
||||
- Verity inodes may be present on the filesystem. (RO_COMPAT_VERITY)
|
||||
* - 0x10000
|
||||
- Indicates orphan file may have valid orphan entries and thus we need
|
||||
to clean them up when mounting the filesystem
|
||||
(RO\_COMPAT\_ORPHAN\_PRESENT).
|
||||
(RO_COMPAT_ORPHAN_PRESENT).
|
||||
|
||||
.. _super_def_hash:
|
||||
|
||||
@ -761,36 +761,36 @@ The ``s_default_mount_opts`` field is any combination of the following:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0x0001
|
||||
- Print debugging info upon (re)mount. (EXT4\_DEFM\_DEBUG)
|
||||
- Print debugging info upon (re)mount. (EXT4_DEFM_DEBUG)
|
||||
* - 0x0002
|
||||
- New files take the gid of the containing directory (instead of the fsgid
|
||||
of the current process). (EXT4\_DEFM\_BSDGROUPS)
|
||||
of the current process). (EXT4_DEFM_BSDGROUPS)
|
||||
* - 0x0004
|
||||
- Support userspace-provided extended attributes. (EXT4\_DEFM\_XATTR\_USER)
|
||||
- Support userspace-provided extended attributes. (EXT4_DEFM_XATTR_USER)
|
||||
* - 0x0008
|
||||
- Support POSIX access control lists (ACLs). (EXT4\_DEFM\_ACL)
|
||||
- Support POSIX access control lists (ACLs). (EXT4_DEFM_ACL)
|
||||
* - 0x0010
|
||||
- Do not support 32-bit UIDs. (EXT4\_DEFM\_UID16)
|
||||
- Do not support 32-bit UIDs. (EXT4_DEFM_UID16)
|
||||
* - 0x0020
|
||||
- All data and metadata are commited to the journal.
|
||||
(EXT4\_DEFM\_JMODE\_DATA)
|
||||
(EXT4_DEFM_JMODE_DATA)
|
||||
* - 0x0040
|
||||
- All data are flushed to the disk before metadata are committed to the
|
||||
journal. (EXT4\_DEFM\_JMODE\_ORDERED)
|
||||
journal. (EXT4_DEFM_JMODE_ORDERED)
|
||||
* - 0x0060
|
||||
- Data ordering is not preserved; data may be written after the metadata
|
||||
has been written. (EXT4\_DEFM\_JMODE\_WBACK)
|
||||
has been written. (EXT4_DEFM_JMODE_WBACK)
|
||||
* - 0x0100
|
||||
- Disable write flushes. (EXT4\_DEFM\_NOBARRIER)
|
||||
- Disable write flushes. (EXT4_DEFM_NOBARRIER)
|
||||
* - 0x0200
|
||||
- Track which blocks in a filesystem are metadata and therefore should not
|
||||
be used as data blocks. This option will be enabled by default on 3.18,
|
||||
hopefully. (EXT4\_DEFM\_BLOCK\_VALIDITY)
|
||||
hopefully. (EXT4_DEFM_BLOCK_VALIDITY)
|
||||
* - 0x0400
|
||||
- Enable DISCARD support, where the storage device is told about blocks
|
||||
becoming unused. (EXT4\_DEFM\_DISCARD)
|
||||
becoming unused. (EXT4_DEFM_DISCARD)
|
||||
* - 0x0800
|
||||
- Disable delayed allocation. (EXT4\_DEFM\_NODELALLOC)
|
||||
- Disable delayed allocation. (EXT4_DEFM_NODELALLOC)
|
||||
|
||||
.. _super_flags:
|
||||
|
||||
@ -820,12 +820,12 @@ The ``s_encrypt_algos`` list can contain any of the following:
|
||||
* - Value
|
||||
- Description
|
||||
* - 0
|
||||
- Invalid algorithm (ENCRYPTION\_MODE\_INVALID).
|
||||
- Invalid algorithm (ENCRYPTION_MODE_INVALID).
|
||||
* - 1
|
||||
- 256-bit AES in XTS mode (ENCRYPTION\_MODE\_AES\_256\_XTS).
|
||||
- 256-bit AES in XTS mode (ENCRYPTION_MODE_AES_256_XTS).
|
||||
* - 2
|
||||
- 256-bit AES in GCM mode (ENCRYPTION\_MODE\_AES\_256\_GCM).
|
||||
- 256-bit AES in GCM mode (ENCRYPTION_MODE_AES_256_GCM).
|
||||
* - 3
|
||||
- 256-bit AES in CBC mode (ENCRYPTION\_MODE\_AES\_256\_CBC).
|
||||
- 256-bit AES in CBC mode (ENCRYPTION_MODE_AES_256_CBC).
|
||||
|
||||
Total size of the superblock is 1024 bytes.
|
||||
|
@ -129,18 +129,24 @@ yet. Bug reports are always welcome at the issue tracker below!
|
||||
* - arm64
|
||||
- Supported
|
||||
- ``LLVM=1``
|
||||
* - hexagon
|
||||
- Maintained
|
||||
- ``LLVM=1``
|
||||
* - mips
|
||||
- Maintained
|
||||
- ``CC=clang``
|
||||
- ``LLVM=1``
|
||||
* - powerpc
|
||||
- Maintained
|
||||
- ``CC=clang``
|
||||
* - riscv
|
||||
- Maintained
|
||||
- ``CC=clang``
|
||||
- ``LLVM=1``
|
||||
* - s390
|
||||
- Maintained
|
||||
- ``CC=clang``
|
||||
* - um (User Mode)
|
||||
- Maintained
|
||||
- ``LLVM=1``
|
||||
* - x86
|
||||
- Supported
|
||||
- ``LLVM=1``
|
||||
|
@ -45,10 +45,12 @@ Name Alias Usage Preserved
|
||||
``$r23``-``$r31`` ``$s0``-``$s8`` Static registers Yes
|
||||
================= =============== =================== ============
|
||||
|
||||
Note: The register ``$r21`` is reserved in the ELF psABI, but used by the Linux
|
||||
kernel for storing the percpu base address. It normally has no ABI name, but is
|
||||
called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1`` in some old code,
|
||||
however they are deprecated aliases of ``$a0`` and ``$a1`` respectively.
|
||||
.. Note::
|
||||
The register ``$r21`` is reserved in the ELF psABI, but used by the Linux
|
||||
kernel for storing the percpu base address. It normally has no ABI name,
|
||||
but is called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1``
|
||||
in some old code,however they are deprecated aliases of ``$a0`` and ``$a1``
|
||||
respectively.
|
||||
|
||||
FPRs
|
||||
----
|
||||
@ -69,8 +71,9 @@ Name Alias Usage Preserved
|
||||
``$f24``-``$f31`` ``$fs0``-``$fs7`` Static registers Yes
|
||||
================= ================== =================== ============
|
||||
|
||||
Note: You may see ``$fv0`` or ``$fv1`` in some old code, however they are deprecated
|
||||
aliases of ``$fa0`` and ``$fa1`` respectively.
|
||||
.. Note::
|
||||
You may see ``$fv0`` or ``$fv1`` in some old code, however they are
|
||||
deprecated aliases of ``$fa0`` and ``$fa1`` respectively.
|
||||
|
||||
VRs
|
||||
----
|
||||
|
@ -145,12 +145,16 @@ Documentation of Loongson's LS7A chipset:
|
||||
|
||||
https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (in English)
|
||||
|
||||
Note: CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described
|
||||
in Section 7.4 of "LoongArch Reference Manual, Vol 1"; LIOINTC is "Legacy I/O
|
||||
Interrupts" described in Section 11.1 of "Loongson 3A5000 Processor Reference
|
||||
Manual"; EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of
|
||||
"Loongson 3A5000 Processor Reference Manual"; HTVECINTC is "HyperTransport
|
||||
Interrupts" described in Section 14.3 of "Loongson 3A5000 Processor Reference
|
||||
Manual"; PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of
|
||||
"Loongson 7A1000 Bridge User Manual"; PCH-LPC is "LPC Interrupts" described in
|
||||
Section 24.3 of "Loongson 7A1000 Bridge User Manual".
|
||||
.. Note::
|
||||
- CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described
|
||||
in Section 7.4 of "LoongArch Reference Manual, Vol 1";
|
||||
- LIOINTC is "Legacy I/OInterrupts" described in Section 11.1 of
|
||||
"Loongson 3A5000 Processor Reference Manual";
|
||||
- EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of
|
||||
"Loongson 3A5000 Processor Reference Manual";
|
||||
- HTVECINTC is "HyperTransport Interrupts" described in Section 14.3 of
|
||||
"Loongson 3A5000 Processor Reference Manual";
|
||||
- PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of
|
||||
"Loongson 7A1000 Bridge User Manual";
|
||||
- PCH-LPC is "LPC Interrupts" described in Section 24.3 of
|
||||
"Loongson 7A1000 Bridge User Manual".
|
||||
|
@ -2925,6 +2925,43 @@ plpmtud_probe_interval - INTEGER
|
||||
|
||||
Default: 0
|
||||
|
||||
reconf_enable - BOOLEAN
|
||||
Enable or disable extension of Stream Reconfiguration functionality
|
||||
specified in RFC6525. This extension provides the ability to "reset"
|
||||
a stream, and it includes the Parameters of "Outgoing/Incoming SSN
|
||||
Reset", "SSN/TSN Reset" and "Add Outgoing/Incoming Streams".
|
||||
|
||||
- 1: Enable extension.
|
||||
- 0: Disable extension.
|
||||
|
||||
Default: 0
|
||||
|
||||
intl_enable - BOOLEAN
|
||||
Enable or disable extension of User Message Interleaving functionality
|
||||
specified in RFC8260. This extension allows the interleaving of user
|
||||
messages sent on different streams. With this feature enabled, I-DATA
|
||||
chunk will replace DATA chunk to carry user messages if also supported
|
||||
by the peer. Note that to use this feature, one needs to set this option
|
||||
to 1 and also needs to set socket options SCTP_FRAGMENT_INTERLEAVE to 2
|
||||
and SCTP_INTERLEAVING_SUPPORTED to 1.
|
||||
|
||||
- 1: Enable extension.
|
||||
- 0: Disable extension.
|
||||
|
||||
Default: 0
|
||||
|
||||
ecn_enable - BOOLEAN
|
||||
Control use of Explicit Congestion Notification (ECN) by SCTP.
|
||||
Like in TCP, ECN is used only when both ends of the SCTP connection
|
||||
indicate support for it. This feature is useful in avoiding losses
|
||||
due to congestion by allowing supporting routers to signal congestion
|
||||
before having to drop packets.
|
||||
|
||||
1: Enable ecn.
|
||||
0: Disable ecn.
|
||||
|
||||
Default: 1
|
||||
|
||||
|
||||
``/proc/sys/net/core/*``
|
||||
========================
|
||||
|
@ -104,7 +104,7 @@ Whenever possible, use the PHY side RGMII delay for these reasons:
|
||||
|
||||
* PHY device drivers in PHYLIB being reusable by nature, being able to
|
||||
configure correctly a specified delay enables more designs with similar delay
|
||||
requirements to be operate correctly
|
||||
requirements to be operated correctly
|
||||
|
||||
For cases where the PHY is not capable of providing this delay, but the
|
||||
Ethernet MAC driver is capable of doing so, the correct phy_interface_t value
|
||||
|
@ -46,10 +46,11 @@ LA64中每个寄存器为64位宽。 ``$r0`` 的内容总是固定为0,而其
|
||||
``$r23``-``$r31`` ``$s0``-``$s8`` 静态寄存器 是
|
||||
================= =============== =================== ==========
|
||||
|
||||
注意:``$r21``寄存器在ELF psABI中保留未使用,但是在Linux内核用于保存每CPU
|
||||
变量基地址。该寄存器没有ABI命名,不过在内核中称为``$u0``。在一些遗留代码
|
||||
中有时可能见到``$v0``和``$v1``,它们是``$a0``和``$a1``的别名,属于已经废弃
|
||||
的用法。
|
||||
.. note::
|
||||
注意: ``$r21`` 寄存器在ELF psABI中保留未使用,但是在Linux内核用于保
|
||||
存每CPU变量基地址。该寄存器没有ABI命名,不过在内核中称为 ``$u0`` 。在
|
||||
一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 ``$a0`` 和
|
||||
``$a1`` 的别名,属于已经废弃的用法。
|
||||
|
||||
浮点寄存器
|
||||
----------
|
||||
@ -68,8 +69,9 @@ LA64中每个寄存器为64位宽。 ``$r0`` 的内容总是固定为0,而其
|
||||
``$f24``-``$f31`` ``$fs0``-``$fs7`` 静态寄存器 是
|
||||
================= ================== =================== ==========
|
||||
|
||||
注意:在一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 ``$a0``
|
||||
和 ``$a1`` 的别名,属于已经废弃的用法。
|
||||
.. note::
|
||||
注意:在一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是
|
||||
``$a0`` 和 ``$a1`` 的别名,属于已经废弃的用法。
|
||||
|
||||
|
||||
向量寄存器
|
||||
|
@ -147,9 +147,11 @@ PCH-LPC::
|
||||
|
||||
https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (英文版)
|
||||
|
||||
注:CPUINTC即《龙芯架构参考手册卷一》第7.4节所描述的CSR.ECFG/CSR.ESTAT寄存器及其中断
|
||||
控制逻辑;LIOINTC即《龙芯3A5000处理器使用手册》第11.1节所描述的“传统I/O中断”;EIOINTC
|
||||
即《龙芯3A5000处理器使用手册》第11.2节所描述的“扩展I/O中断”;HTVECINTC即《龙芯3A5000
|
||||
处理器使用手册》第14.3节所描述的“HyperTransport中断”;PCH-PIC/PCH-MSI即《龙芯7A1000桥
|
||||
片用户手册》第5章所描述的“中断控制器”;PCH-LPC即《龙芯7A1000桥片用户手册》第24.3节所
|
||||
描述的“LPC中断”。
|
||||
.. note::
|
||||
- CPUINTC:即《龙芯架构参考手册卷一》第7.4节所描述的CSR.ECFG/CSR.ESTAT寄存器及其
|
||||
中断控制逻辑;
|
||||
- LIOINTC:即《龙芯3A5000处理器使用手册》第11.1节所描述的“传统I/O中断”;
|
||||
- EIOINTC:即《龙芯3A5000处理器使用手册》第11.2节所描述的“扩展I/O中断”;
|
||||
- HTVECINTC:即《龙芯3A5000处理器使用手册》第14.3节所描述的“HyperTransport中断”;
|
||||
- PCH-PIC/PCH-MSI:即《龙芯7A1000桥片用户手册》第5章所描述的“中断控制器”;
|
||||
- PCH-LPC:即《龙芯7A1000桥片用户手册》第24.3节所描述的“LPC中断”。
|
||||
|
@ -120,7 +120,8 @@ Testing
|
||||
unpoison-pfn
|
||||
Software-unpoison page at PFN echoed into this file. This way
|
||||
a page can be reused again. This only works for Linux
|
||||
injected failures, not for real memory failures.
|
||||
injected failures, not for real memory failures. Once any hardware
|
||||
memory failure happens, this feature is disabled.
|
||||
|
||||
Note these injection interfaces are not stable and might change between
|
||||
kernel versions
|
||||
|
133
MAINTAINERS
133
MAINTAINERS
@ -427,6 +427,7 @@ ACPI VIOT DRIVER
|
||||
M: Jean-Philippe Brucker <jean-philippe@linaro.org>
|
||||
L: linux-acpi@vger.kernel.org
|
||||
L: iommu@lists.linux-foundation.org
|
||||
L: iommu@lists.linux.dev
|
||||
S: Maintained
|
||||
F: drivers/acpi/viot.c
|
||||
F: include/linux/acpi_viot.h
|
||||
@ -960,6 +961,7 @@ AMD IOMMU (AMD-VI)
|
||||
M: Joerg Roedel <joro@8bytes.org>
|
||||
R: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
|
||||
L: iommu@lists.linux-foundation.org
|
||||
L: iommu@lists.linux.dev
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
|
||||
F: drivers/iommu/amd/
|
||||
@ -2467,6 +2469,7 @@ ARM/NXP S32G ARCHITECTURE
|
||||
M: Chester Lin <clin@suse.com>
|
||||
R: Andreas Färber <afaerber@suse.de>
|
||||
R: Matthias Brugger <mbrugger@suse.com>
|
||||
R: NXP S32 Linux Team <s32@nxp.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Maintained
|
||||
F: arch/arm64/boot/dts/freescale/s32g*.dts*
|
||||
@ -3662,7 +3665,7 @@ BPF JIT for ARM
|
||||
M: Shubham Bansal <illusionist.neo@gmail.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Odd Fixes
|
||||
F: arch/arm/net/
|
||||
|
||||
BPF JIT for ARM64
|
||||
@ -3686,14 +3689,15 @@ BPF JIT for NFP NICs
|
||||
M: Jakub Kicinski <kuba@kernel.org>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Supported
|
||||
S: Odd Fixes
|
||||
F: drivers/net/ethernet/netronome/nfp/bpf/
|
||||
|
||||
BPF JIT for POWERPC (32-BIT AND 64-BIT)
|
||||
M: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
|
||||
M: Michael Ellerman <mpe@ellerman.id.au>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Supported
|
||||
F: arch/powerpc/net/
|
||||
|
||||
BPF JIT for RISC-V (32-bit)
|
||||
@ -3719,7 +3723,7 @@ M: Heiko Carstens <hca@linux.ibm.com>
|
||||
M: Vasily Gorbik <gor@linux.ibm.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Supported
|
||||
F: arch/s390/net/
|
||||
X: arch/s390/net/pnet.c
|
||||
|
||||
@ -3727,14 +3731,14 @@ BPF JIT for SPARC (32-BIT AND 64-BIT)
|
||||
M: David S. Miller <davem@davemloft.net>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Odd Fixes
|
||||
F: arch/sparc/net/
|
||||
|
||||
BPF JIT for X86 32-BIT
|
||||
M: Wang YanQing <udknight@gmail.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Odd Fixes
|
||||
F: arch/x86/net/bpf_jit_comp32.c
|
||||
|
||||
BPF JIT for X86 64-BIT
|
||||
@ -3757,6 +3761,19 @@ F: include/linux/bpf_lsm.h
|
||||
F: kernel/bpf/bpf_lsm.c
|
||||
F: security/bpf/
|
||||
|
||||
BPF L7 FRAMEWORK
|
||||
M: John Fastabend <john.fastabend@gmail.com>
|
||||
M: Jakub Sitnicki <jakub@cloudflare.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
F: include/linux/skmsg.h
|
||||
F: net/core/skmsg.c
|
||||
F: net/core/sock_map.c
|
||||
F: net/ipv4/tcp_bpf.c
|
||||
F: net/ipv4/udp_bpf.c
|
||||
F: net/unix/unix_bpf.c
|
||||
|
||||
BPFTOOL
|
||||
M: Quentin Monnet <quentin@isovalent.com>
|
||||
L: bpf@vger.kernel.org
|
||||
@ -3796,12 +3813,12 @@ N: bcmbca
|
||||
N: bcm[9]?47622
|
||||
|
||||
BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE
|
||||
M: Nicolas Saenz Julienne <nsaenz@kernel.org>
|
||||
M: Florian Fainelli <f.fainelli@gmail.com>
|
||||
R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
|
||||
L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/nsaenz/linux-rpi.git
|
||||
T: git git://github.com/broadcom/stblinux.git
|
||||
F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml
|
||||
F: drivers/pci/controller/pcie-brcmstb.c
|
||||
F: drivers/staging/vc04_services
|
||||
@ -5962,6 +5979,7 @@ M: Christoph Hellwig <hch@lst.de>
|
||||
M: Marek Szyprowski <m.szyprowski@samsung.com>
|
||||
R: Robin Murphy <robin.murphy@arm.com>
|
||||
L: iommu@lists.linux-foundation.org
|
||||
L: iommu@lists.linux.dev
|
||||
S: Supported
|
||||
W: http://git.infradead.org/users/hch/dma-mapping.git
|
||||
T: git git://git.infradead.org/users/hch/dma-mapping.git
|
||||
@ -5974,6 +5992,7 @@ F: kernel/dma/
|
||||
DMA MAPPING BENCHMARK
|
||||
M: Xiang Chen <chenxiang66@hisilicon.com>
|
||||
L: iommu@lists.linux-foundation.org
|
||||
L: iommu@lists.linux.dev
|
||||
F: kernel/dma/map_benchmark.c
|
||||
F: tools/testing/selftests/dma/
|
||||
|
||||
@ -7558,6 +7577,7 @@ F: drivers/gpu/drm/exynos/exynos_dp*
|
||||
EXYNOS SYSMMU (IOMMU) driver
|
||||
M: Marek Szyprowski <m.szyprowski@samsung.com>
|
||||
L: iommu@lists.linux-foundation.org
|
||||
L: iommu@lists.linux.dev
|
||||
S: Maintained
|
||||
F: drivers/iommu/exynos-iommu.c
|
||||
|
||||
@ -8479,6 +8499,7 @@ F: Documentation/devicetree/bindings/gpio/
|
||||
F: Documentation/driver-api/gpio/
|
||||
F: drivers/gpio/
|
||||
F: include/asm-generic/gpio.h
|
||||
F: include/dt-bindings/gpio/
|
||||
F: include/linux/gpio.h
|
||||
F: include/linux/gpio/
|
||||
F: include/linux/of_gpio.h
|
||||
@ -9132,6 +9153,7 @@ F: drivers/media/platform/st/sti/hva
|
||||
|
||||
HWPOISON MEMORY FAILURE HANDLING
|
||||
M: Naoya Horiguchi <naoya.horiguchi@nec.com>
|
||||
R: Miaohe Lin <linmiaohe@huawei.com>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: mm/hwpoison-inject.c
|
||||
@ -9276,6 +9298,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
|
||||
F: Documentation/devicetree/bindings/i2c/i2c.txt
|
||||
F: Documentation/i2c/
|
||||
F: drivers/i2c/*
|
||||
F: include/dt-bindings/i2c/i2c.h
|
||||
F: include/linux/i2c-dev.h
|
||||
F: include/linux/i2c-smbus.h
|
||||
F: include/linux/i2c.h
|
||||
@ -9291,6 +9314,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
|
||||
F: Documentation/devicetree/bindings/i2c/
|
||||
F: drivers/i2c/algos/
|
||||
F: drivers/i2c/busses/
|
||||
F: include/dt-bindings/i2c/
|
||||
|
||||
I2C-TAOS-EVM DRIVER
|
||||
M: Jean Delvare <jdelvare@suse.com>
|
||||
@ -9975,6 +9999,7 @@ INTEL IOMMU (VT-d)
|
||||
M: David Woodhouse <dwmw2@infradead.org>
|
||||
M: Lu Baolu <baolu.lu@linux.intel.com>
|
||||
L: iommu@lists.linux-foundation.org
|
||||
L: iommu@lists.linux.dev
|
||||
S: Supported
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
|
||||
F: drivers/iommu/intel/
|
||||
@ -10354,6 +10379,7 @@ IOMMU DRIVERS
|
||||
M: Joerg Roedel <joro@8bytes.org>
|
||||
M: Will Deacon <will@kernel.org>
|
||||
L: iommu@lists.linux-foundation.org
|
||||
L: iommu@lists.linux.dev
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
|
||||
F: Documentation/devicetree/bindings/iommu/
|
||||
@ -10830,6 +10856,7 @@ M: Marc Zyngier <maz@kernel.org>
|
||||
R: James Morse <james.morse@arm.com>
|
||||
R: Alexandru Elisei <alexandru.elisei@arm.com>
|
||||
R: Suzuki K Poulose <suzuki.poulose@arm.com>
|
||||
R: Oliver Upton <oliver.upton@linux.dev>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
L: kvmarm@lists.cs.columbia.edu (moderated for non-subscribers)
|
||||
S: Maintained
|
||||
@ -10872,7 +10899,6 @@ F: arch/riscv/include/asm/kvm*
|
||||
F: arch/riscv/include/uapi/asm/kvm*
|
||||
F: arch/riscv/kvm/
|
||||
F: tools/testing/selftests/kvm/*/riscv/
|
||||
F: tools/testing/selftests/kvm/riscv/
|
||||
|
||||
KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
|
||||
M: Christian Borntraeger <borntraeger@linux.ibm.com>
|
||||
@ -10897,28 +10923,51 @@ F: tools/testing/selftests/kvm/*/s390x/
|
||||
F: tools/testing/selftests/kvm/s390x/
|
||||
|
||||
KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
|
||||
M: Sean Christopherson <seanjc@google.com>
|
||||
M: Paolo Bonzini <pbonzini@redhat.com>
|
||||
R: Sean Christopherson <seanjc@google.com>
|
||||
R: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
R: Wanpeng Li <wanpengli@tencent.com>
|
||||
R: Jim Mattson <jmattson@google.com>
|
||||
R: Joerg Roedel <joro@8bytes.org>
|
||||
L: kvm@vger.kernel.org
|
||||
S: Supported
|
||||
W: http://www.linux-kvm.org
|
||||
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
|
||||
F: arch/x86/include/asm/kvm*
|
||||
F: arch/x86/include/asm/pvclock-abi.h
|
||||
F: arch/x86/include/asm/svm.h
|
||||
F: arch/x86/include/asm/vmx*.h
|
||||
F: arch/x86/include/uapi/asm/kvm*
|
||||
F: arch/x86/include/uapi/asm/svm.h
|
||||
F: arch/x86/include/uapi/asm/vmx.h
|
||||
F: arch/x86/kernel/kvm.c
|
||||
F: arch/x86/kernel/kvmclock.c
|
||||
F: arch/x86/kvm/
|
||||
F: arch/x86/kvm/*/
|
||||
|
||||
KVM PARAVIRT (KVM/paravirt)
|
||||
M: Paolo Bonzini <pbonzini@redhat.com>
|
||||
R: Wanpeng Li <wanpengli@tencent.com>
|
||||
R: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
L: kvm@vger.kernel.org
|
||||
S: Supported
|
||||
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
|
||||
F: arch/x86/kernel/kvm.c
|
||||
F: arch/x86/kernel/kvmclock.c
|
||||
F: arch/x86/include/asm/pvclock-abi.h
|
||||
F: include/linux/kvm_para.h
|
||||
F: include/uapi/linux/kvm_para.h
|
||||
F: include/uapi/asm-generic/kvm_para.h
|
||||
F: include/asm-generic/kvm_para.h
|
||||
F: arch/um/include/asm/kvm_para.h
|
||||
F: arch/x86/include/asm/kvm_para.h
|
||||
F: arch/x86/include/uapi/asm/kvm_para.h
|
||||
|
||||
KVM X86 HYPER-V (KVM/hyper-v)
|
||||
M: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
M: Sean Christopherson <seanjc@google.com>
|
||||
M: Paolo Bonzini <pbonzini@redhat.com>
|
||||
L: kvm@vger.kernel.org
|
||||
S: Supported
|
||||
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
|
||||
F: arch/x86/kvm/hyperv.*
|
||||
F: arch/x86/kvm/kvm_onhyperv.*
|
||||
F: arch/x86/kvm/svm/hyperv.*
|
||||
F: arch/x86/kvm/svm/svm_onhyperv.*
|
||||
F: arch/x86/kvm/vmx/evmcs.*
|
||||
|
||||
KERNFS
|
||||
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
||||
M: Tejun Heo <tj@kernel.org>
|
||||
@ -11097,20 +11146,6 @@ S: Maintained
|
||||
F: include/net/l3mdev.h
|
||||
F: net/l3mdev
|
||||
|
||||
L7 BPF FRAMEWORK
|
||||
M: John Fastabend <john.fastabend@gmail.com>
|
||||
M: Daniel Borkmann <daniel@iogearbox.net>
|
||||
M: Jakub Sitnicki <jakub@cloudflare.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
F: include/linux/skmsg.h
|
||||
F: net/core/skmsg.c
|
||||
F: net/core/sock_map.c
|
||||
F: net/ipv4/tcp_bpf.c
|
||||
F: net/ipv4/udp_bpf.c
|
||||
F: net/unix/unix_bpf.c
|
||||
|
||||
LANDLOCK SECURITY MODULE
|
||||
M: Mickaël Salaün <mic@digikod.net>
|
||||
L: linux-security-module@vger.kernel.org
|
||||
@ -11590,6 +11625,7 @@ F: drivers/gpu/drm/bridge/lontium-lt8912b.c
|
||||
LOONGARCH
|
||||
M: Huacai Chen <chenhuacai@kernel.org>
|
||||
R: WANG Xuerui <kernel@xen0n.name>
|
||||
L: loongarch@lists.linux.dev
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git
|
||||
F: arch/loongarch/
|
||||
@ -12503,6 +12539,7 @@ F: drivers/i2c/busses/i2c-mt65xx.c
|
||||
MEDIATEK IOMMU DRIVER
|
||||
M: Yong Wu <yong.wu@mediatek.com>
|
||||
L: iommu@lists.linux-foundation.org
|
||||
L: iommu@lists.linux.dev
|
||||
L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Supported
|
||||
F: Documentation/devicetree/bindings/iommu/mediatek*
|
||||
@ -12845,9 +12882,8 @@ M: Andrew Morton <akpm@linux-foundation.org>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
W: http://www.linux-mm.org
|
||||
T: quilt https://ozlabs.org/~akpm/mmotm/
|
||||
T: quilt https://ozlabs.org/~akpm/mmots/
|
||||
T: git git://github.com/hnaz/linux-mm.git
|
||||
T: git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
|
||||
T: quilt git://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new
|
||||
F: include/linux/gfp.h
|
||||
F: include/linux/memory_hotplug.h
|
||||
F: include/linux/mm.h
|
||||
@ -12857,6 +12893,18 @@ F: include/linux/vmalloc.h
|
||||
F: mm/
|
||||
F: tools/testing/selftests/vm/
|
||||
|
||||
MEMORY HOT(UN)PLUG
|
||||
M: David Hildenbrand <david@redhat.com>
|
||||
M: Oscar Salvador <osalvador@suse.de>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: Documentation/admin-guide/mm/memory-hotplug.rst
|
||||
F: Documentation/core-api/memory-hotplug.rst
|
||||
F: drivers/base/memory.c
|
||||
F: include/linux/memory_hotplug.h
|
||||
F: mm/memory_hotplug.c
|
||||
F: tools/testing/selftests/memory-hotplug/
|
||||
|
||||
MEMORY TECHNOLOGY DEVICES (MTD)
|
||||
M: Miquel Raynal <miquel.raynal@bootlin.com>
|
||||
M: Richard Weinberger <richard@nod.at>
|
||||
@ -13801,6 +13849,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
|
||||
F: Documentation/devicetree/bindings/net/
|
||||
F: drivers/connector/
|
||||
F: drivers/net/
|
||||
F: include/dt-bindings/net/
|
||||
F: include/linux/etherdevice.h
|
||||
F: include/linux/fcdevice.h
|
||||
F: include/linux/fddidevice.h
|
||||
@ -13952,7 +14001,6 @@ F: net/ipv6/tcp*.c
|
||||
NETWORKING [TLS]
|
||||
M: Boris Pismenny <borisp@nvidia.com>
|
||||
M: John Fastabend <john.fastabend@gmail.com>
|
||||
M: Daniel Borkmann <daniel@iogearbox.net>
|
||||
M: Jakub Kicinski <kuba@kernel.org>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
@ -14261,7 +14309,7 @@ F: drivers/iio/gyro/fxas21002c_i2c.c
|
||||
F: drivers/iio/gyro/fxas21002c_spi.c
|
||||
|
||||
NXP i.MX CLOCK DRIVERS
|
||||
M: Abel Vesa <abel.vesa@nxp.com>
|
||||
M: Abel Vesa <abelvesa@kernel.org>
|
||||
L: linux-clk@vger.kernel.org
|
||||
L: linux-imx@nxp.com
|
||||
S: Maintained
|
||||
@ -14869,6 +14917,7 @@ F: include/dt-bindings/
|
||||
|
||||
OPENCOMPUTE PTP CLOCK DRIVER
|
||||
M: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
M: Vadim Fedorenko <vadfed@fb.com>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/ptp/ptp_ocp.c
|
||||
@ -16488,7 +16537,7 @@ F: Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml
|
||||
F: drivers/cpufreq/qcom-cpufreq-nvmem.c
|
||||
|
||||
QUALCOMM CRYPTO DRIVERS
|
||||
M: Thara Gopinath <thara.gopinath@linaro.org>
|
||||
M: Thara Gopinath <thara.gopinath@gmail.com>
|
||||
L: linux-crypto@vger.kernel.org
|
||||
L: linux-arm-msm@vger.kernel.org
|
||||
S: Maintained
|
||||
@ -16543,6 +16592,7 @@ F: drivers/i2c/busses/i2c-qcom-cci.c
|
||||
QUALCOMM IOMMU
|
||||
M: Rob Clark <robdclark@gmail.com>
|
||||
L: iommu@lists.linux-foundation.org
|
||||
L: iommu@lists.linux.dev
|
||||
L: linux-arm-msm@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/iommu/arm/arm-smmu/qcom_iommu.c
|
||||
@ -16598,7 +16648,7 @@ F: include/linux/if_rmnet.h
|
||||
|
||||
QUALCOMM TSENS THERMAL DRIVER
|
||||
M: Amit Kucheria <amitk@kernel.org>
|
||||
M: Thara Gopinath <thara.gopinath@linaro.org>
|
||||
M: Thara Gopinath <thara.gopinath@gmail.com>
|
||||
L: linux-pm@vger.kernel.org
|
||||
L: linux-arm-msm@vger.kernel.org
|
||||
S: Maintained
|
||||
@ -19168,6 +19218,7 @@ F: arch/x86/boot/video*
|
||||
SWIOTLB SUBSYSTEM
|
||||
M: Christoph Hellwig <hch@infradead.org>
|
||||
L: iommu@lists.linux-foundation.org
|
||||
L: iommu@lists.linux.dev
|
||||
S: Supported
|
||||
W: http://git.infradead.org/users/hch/dma-mapping.git
|
||||
T: git git://git.infradead.org/users/hch/dma-mapping.git
|
||||
@ -19305,7 +19356,7 @@ R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
|
||||
R: Mika Westerberg <mika.westerberg@linux.intel.com>
|
||||
R: Jan Dabros <jsd@semihalf.com>
|
||||
L: linux-i2c@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Supported
|
||||
F: drivers/i2c/busses/i2c-designware-*
|
||||
|
||||
SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
|
||||
@ -20712,6 +20763,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
|
||||
F: Documentation/devicetree/bindings/usb/
|
||||
F: Documentation/usb/
|
||||
F: drivers/usb/
|
||||
F: include/dt-bindings/usb/
|
||||
F: include/linux/usb.h
|
||||
F: include/linux/usb/
|
||||
|
||||
@ -21843,6 +21895,7 @@ M: Juergen Gross <jgross@suse.com>
|
||||
M: Stefano Stabellini <sstabellini@kernel.org>
|
||||
L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
|
||||
L: iommu@lists.linux-foundation.org
|
||||
L: iommu@lists.linux.dev
|
||||
S: Supported
|
||||
F: arch/x86/xen/*swiotlb*
|
||||
F: drivers/xen/*swiotlb*
|
||||
|
4
Makefile
4
Makefile
@ -2,7 +2,7 @@
|
||||
VERSION = 5
|
||||
PATCHLEVEL = 19
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc2
|
||||
EXTRAVERSION = -rc4
|
||||
NAME = Superb Owl
|
||||
|
||||
# *DOCUMENTATION*
|
||||
@ -1141,7 +1141,7 @@ KBUILD_MODULES := 1
|
||||
|
||||
autoksyms_recursive: descend modules.order
|
||||
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \
|
||||
"$(MAKE) -f $(srctree)/Makefile vmlinux"
|
||||
"$(MAKE) -f $(srctree)/Makefile autoksyms_recursive"
|
||||
endif
|
||||
|
||||
autoksyms_h := $(if $(CONFIG_TRIM_UNUSED_KSYMS), include/generated/autoksyms.h)
|
||||
|
@ -1586,7 +1586,6 @@ dtb-$(CONFIG_ARCH_ASPEED) += \
|
||||
aspeed-bmc-lenovo-hr630.dtb \
|
||||
aspeed-bmc-lenovo-hr855xg2.dtb \
|
||||
aspeed-bmc-microsoft-olympus.dtb \
|
||||
aspeed-bmc-nuvia-dc-scm.dtb \
|
||||
aspeed-bmc-opp-lanyang.dtb \
|
||||
aspeed-bmc-opp-mihawk.dtb \
|
||||
aspeed-bmc-opp-mowgli.dtb \
|
||||
@ -1599,6 +1598,7 @@ dtb-$(CONFIG_ARCH_ASPEED) += \
|
||||
aspeed-bmc-opp-witherspoon.dtb \
|
||||
aspeed-bmc-opp-zaius.dtb \
|
||||
aspeed-bmc-portwell-neptune.dtb \
|
||||
aspeed-bmc-qcom-dc-scm-v1.dtb \
|
||||
aspeed-bmc-quanta-q71l.dtb \
|
||||
aspeed-bmc-quanta-s6q.dtb \
|
||||
aspeed-bmc-supermicro-x11spi.dtb \
|
||||
|
@ -6,8 +6,8 @@
|
||||
#include "aspeed-g6.dtsi"
|
||||
|
||||
/ {
|
||||
model = "Nuvia DC-SCM BMC";
|
||||
compatible = "nuvia,dc-scm-bmc", "aspeed,ast2600";
|
||||
model = "Qualcomm DC-SCM V1 BMC";
|
||||
compatible = "qcom,dc-scm-v1-bmc", "aspeed,ast2600";
|
||||
|
||||
aliases {
|
||||
serial4 = &uart5;
|
@ -120,26 +120,31 @@
|
||||
port@0 {
|
||||
reg = <0>;
|
||||
label = "lan1";
|
||||
phy-mode = "internal";
|
||||
};
|
||||
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
label = "lan2";
|
||||
phy-mode = "internal";
|
||||
};
|
||||
|
||||
port@2 {
|
||||
reg = <2>;
|
||||
label = "lan3";
|
||||
phy-mode = "internal";
|
||||
};
|
||||
|
||||
port@3 {
|
||||
reg = <3>;
|
||||
label = "lan4";
|
||||
phy-mode = "internal";
|
||||
};
|
||||
|
||||
port@4 {
|
||||
reg = <4>;
|
||||
label = "lan5";
|
||||
phy-mode = "internal";
|
||||
};
|
||||
|
||||
port@5 {
|
||||
|
@ -28,12 +28,12 @@
|
||||
&expgpio {
|
||||
gpio-line-names = "BT_ON",
|
||||
"WL_ON",
|
||||
"",
|
||||
"PWR_LED_OFF",
|
||||
"GLOBAL_RESET",
|
||||
"VDD_SD_IO_SEL",
|
||||
"CAM_GPIO",
|
||||
"GLOBAL_SHUTDOWN",
|
||||
"SD_PWR_ON",
|
||||
"SD_OC_N";
|
||||
"SHUTDOWN_REQUEST";
|
||||
};
|
||||
|
||||
&genet_mdio {
|
||||
|
@ -593,7 +593,7 @@
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&pinctrl_atmel_conn>;
|
||||
reg = <0x4a>;
|
||||
reset-gpios = <&gpio1 14 GPIO_ACTIVE_HIGH>; /* SODIMM 106 */
|
||||
reset-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; /* SODIMM 106 */
|
||||
status = "disabled";
|
||||
};
|
||||
};
|
||||
|
@ -762,7 +762,7 @@
|
||||
regulator-name = "vddpu";
|
||||
regulator-min-microvolt = <725000>;
|
||||
regulator-max-microvolt = <1450000>;
|
||||
regulator-enable-ramp-delay = <150>;
|
||||
regulator-enable-ramp-delay = <380>;
|
||||
anatop-reg-offset = <0x140>;
|
||||
anatop-vol-bit-shift = <9>;
|
||||
anatop-vol-bit-width = <5>;
|
||||
|
@ -120,6 +120,7 @@
|
||||
compatible = "usb-nop-xceiv";
|
||||
clocks = <&clks IMX7D_USB_HSIC_ROOT_CLK>;
|
||||
clock-names = "main_clk";
|
||||
power-domains = <&pgc_hsic_phy>;
|
||||
#phy-cells = <0>;
|
||||
};
|
||||
|
||||
@ -1153,7 +1154,6 @@
|
||||
compatible = "fsl,imx7d-usb", "fsl,imx27-usb";
|
||||
reg = <0x30b30000 0x200>;
|
||||
interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>;
|
||||
power-domains = <&pgc_hsic_phy>;
|
||||
clocks = <&clks IMX7D_USB_CTRL_CLK>;
|
||||
fsl,usbphy = <&usbphynop3>;
|
||||
fsl,usbmisc = <&usbmisc3 0>;
|
||||
|
47
arch/arm/boot/dts/stm32mp15-scmi.dtsi
Normal file
47
arch/arm/boot/dts/stm32mp15-scmi.dtsi
Normal file
@ -0,0 +1,47 @@
|
||||
// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
|
||||
/*
|
||||
* Copyright (C) STMicroelectronics 2022 - All Rights Reserved
|
||||
* Author: Alexandre Torgue <alexandre.torgue@foss.st.com> for STMicroelectronics.
|
||||
*/
|
||||
|
||||
/ {
|
||||
firmware {
|
||||
optee: optee {
|
||||
compatible = "linaro,optee-tz";
|
||||
method = "smc";
|
||||
};
|
||||
|
||||
scmi: scmi {
|
||||
compatible = "linaro,scmi-optee";
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
linaro,optee-channel-id = <0>;
|
||||
shmem = <&scmi_shm>;
|
||||
|
||||
scmi_clk: protocol@14 {
|
||||
reg = <0x14>;
|
||||
#clock-cells = <1>;
|
||||
};
|
||||
|
||||
scmi_reset: protocol@16 {
|
||||
reg = <0x16>;
|
||||
#reset-cells = <1>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
soc {
|
||||
scmi_sram: sram@2ffff000 {
|
||||
compatible = "mmio-sram";
|
||||
reg = <0x2ffff000 0x1000>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
ranges = <0 0x2ffff000 0x1000>;
|
||||
|
||||
scmi_shm: scmi-sram@0 {
|
||||
compatible = "arm,scmi-shmem";
|
||||
reg = <0 0x80>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
@ -115,33 +115,6 @@
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
firmware {
|
||||
optee: optee {
|
||||
compatible = "linaro,optee-tz";
|
||||
method = "smc";
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
scmi: scmi {
|
||||
compatible = "linaro,scmi-optee";
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
linaro,optee-channel-id = <0>;
|
||||
shmem = <&scmi_shm>;
|
||||
status = "disabled";
|
||||
|
||||
scmi_clk: protocol@14 {
|
||||
reg = <0x14>;
|
||||
#clock-cells = <1>;
|
||||
};
|
||||
|
||||
scmi_reset: protocol@16 {
|
||||
reg = <0x16>;
|
||||
#reset-cells = <1>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
soc {
|
||||
compatible = "simple-bus";
|
||||
#address-cells = <1>;
|
||||
@ -149,20 +122,6 @@
|
||||
interrupt-parent = <&intc>;
|
||||
ranges;
|
||||
|
||||
scmi_sram: sram@2ffff000 {
|
||||
compatible = "mmio-sram";
|
||||
reg = <0x2ffff000 0x1000>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
ranges = <0 0x2ffff000 0x1000>;
|
||||
|
||||
scmi_shm: scmi-sram@0 {
|
||||
compatible = "arm,scmi-shmem";
|
||||
reg = <0 0x80>;
|
||||
status = "disabled";
|
||||
};
|
||||
};
|
||||
|
||||
timers2: timer@40000000 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
@ -7,6 +7,7 @@
|
||||
/dts-v1/;
|
||||
|
||||
#include "stm32mp157a-dk1.dts"
|
||||
#include "stm32mp15-scmi.dtsi"
|
||||
|
||||
/ {
|
||||
model = "STMicroelectronics STM32MP157A-DK1 SCMI Discovery Board";
|
||||
@ -54,10 +55,6 @@
|
||||
resets = <&scmi_reset RST_SCMI_MCU>;
|
||||
};
|
||||
|
||||
&optee {
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
&rcc {
|
||||
compatible = "st,stm32mp1-rcc-secure", "syscon";
|
||||
clock-names = "hse", "hsi", "csi", "lse", "lsi";
|
||||
@ -76,11 +73,3 @@
|
||||
&rtc {
|
||||
clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>;
|
||||
};
|
||||
|
||||
&scmi {
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
&scmi_shm {
|
||||
status = "okay";
|
||||
};
|
||||
|
@ -7,6 +7,7 @@
|
||||
/dts-v1/;
|
||||
|
||||
#include "stm32mp157c-dk2.dts"
|
||||
#include "stm32mp15-scmi.dtsi"
|
||||
|
||||
/ {
|
||||
model = "STMicroelectronics STM32MP157C-DK2 SCMI Discovery Board";
|
||||
@ -63,10 +64,6 @@
|
||||
resets = <&scmi_reset RST_SCMI_MCU>;
|
||||
};
|
||||
|
||||
&optee {
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
&rcc {
|
||||
compatible = "st,stm32mp1-rcc-secure", "syscon";
|
||||
clock-names = "hse", "hsi", "csi", "lse", "lsi";
|
||||
@ -85,11 +82,3 @@
|
||||
&rtc {
|
||||
clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>;
|
||||
};
|
||||
|
||||
&scmi {
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
&scmi_shm {
|
||||
status = "okay";
|
||||
};
|
||||
|
@ -7,6 +7,7 @@
|
||||
/dts-v1/;
|
||||
|
||||
#include "stm32mp157c-ed1.dts"
|
||||
#include "stm32mp15-scmi.dtsi"
|
||||
|
||||
/ {
|
||||
model = "STMicroelectronics STM32MP157C-ED1 SCMI eval daughter";
|
||||
@ -59,10 +60,6 @@
|
||||
resets = <&scmi_reset RST_SCMI_MCU>;
|
||||
};
|
||||
|
||||
&optee {
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
&rcc {
|
||||
compatible = "st,stm32mp1-rcc-secure", "syscon";
|
||||
clock-names = "hse", "hsi", "csi", "lse", "lsi";
|
||||
@ -81,11 +78,3 @@
|
||||
&rtc {
|
||||
clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>;
|
||||
};
|
||||
|
||||
&scmi {
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
&scmi_shm {
|
||||
status = "okay";
|
||||
};
|
||||
|
@ -7,6 +7,7 @@
|
||||
/dts-v1/;
|
||||
|
||||
#include "stm32mp157c-ev1.dts"
|
||||
#include "stm32mp15-scmi.dtsi"
|
||||
|
||||
/ {
|
||||
model = "STMicroelectronics STM32MP157C-EV1 SCMI eval daughter on eval mother";
|
||||
@ -68,10 +69,6 @@
|
||||
resets = <&scmi_reset RST_SCMI_MCU>;
|
||||
};
|
||||
|
||||
&optee {
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
&rcc {
|
||||
compatible = "st,stm32mp1-rcc-secure", "syscon";
|
||||
clock-names = "hse", "hsi", "csi", "lse", "lsi";
|
||||
@ -90,11 +87,3 @@
|
||||
&rtc {
|
||||
clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>;
|
||||
};
|
||||
|
||||
&scmi {
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
&scmi_shm {
|
||||
status = "okay";
|
||||
};
|
||||
|
@ -39,6 +39,7 @@ static int axxia_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
return -ENOENT;
|
||||
|
||||
syscon = of_iomap(syscon_np, 0);
|
||||
of_node_put(syscon_np);
|
||||
if (!syscon)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -372,6 +372,7 @@ static void __init cns3xxx_init(void)
|
||||
/* De-Asscer SATA Reset */
|
||||
cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SATA));
|
||||
}
|
||||
of_node_put(dn);
|
||||
|
||||
dn = of_find_compatible_node(NULL, NULL, "cavium,cns3420-sdhci");
|
||||
if (of_device_is_available(dn)) {
|
||||
@ -385,6 +386,7 @@ static void __init cns3xxx_init(void)
|
||||
cns3xxx_pwr_clk_en(CNS3XXX_PWR_CLK_EN(SDIO));
|
||||
cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SDIO));
|
||||
}
|
||||
of_node_put(dn);
|
||||
|
||||
pm_power_off = cns3xxx_power_off;
|
||||
|
||||
|
@ -149,6 +149,7 @@ static void exynos_map_pmu(void)
|
||||
np = of_find_matching_node(NULL, exynos_dt_pmu_match);
|
||||
if (np)
|
||||
pmu_base_addr = of_iomap(np, 0);
|
||||
of_node_put(np);
|
||||
}
|
||||
|
||||
static void __init exynos_init_irq(void)
|
||||
|
@ -218,13 +218,13 @@ void __init spear_setup_of_timer(void)
|
||||
irq = irq_of_parse_and_map(np, 0);
|
||||
if (!irq) {
|
||||
pr_err("%s: No irq passed for timer via DT\n", __func__);
|
||||
return;
|
||||
goto err_put_np;
|
||||
}
|
||||
|
||||
gpt_base = of_iomap(np, 0);
|
||||
if (!gpt_base) {
|
||||
pr_err("%s: of iomap failed\n", __func__);
|
||||
return;
|
||||
goto err_put_np;
|
||||
}
|
||||
|
||||
gpt_clk = clk_get_sys("gpt0", NULL);
|
||||
@ -239,6 +239,8 @@ void __init spear_setup_of_timer(void)
|
||||
goto err_prepare_enable_clk;
|
||||
}
|
||||
|
||||
of_node_put(np);
|
||||
|
||||
spear_clockevent_init(irq);
|
||||
spear_clocksource_init();
|
||||
|
||||
@ -248,4 +250,6 @@ err_prepare_enable_clk:
|
||||
clk_put(gpt_clk);
|
||||
err_iomap:
|
||||
iounmap(gpt_base);
|
||||
err_put_np:
|
||||
of_node_put(np);
|
||||
}
|
||||
|
@ -280,8 +280,8 @@
|
||||
interrupts = <GIC_SPI 246 IRQ_TYPE_LEVEL_HIGH>;
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&uart0_bus>;
|
||||
clocks = <&cmu_peri CLK_GOUT_UART0_EXT_UCLK>,
|
||||
<&cmu_peri CLK_GOUT_UART0_PCLK>;
|
||||
clocks = <&cmu_peri CLK_GOUT_UART0_PCLK>,
|
||||
<&cmu_peri CLK_GOUT_UART0_EXT_UCLK>;
|
||||
clock-names = "uart", "clk_uart_baud0";
|
||||
samsung,uart-fifosize = <64>;
|
||||
status = "disabled";
|
||||
@ -293,8 +293,8 @@
|
||||
interrupts = <GIC_SPI 247 IRQ_TYPE_LEVEL_HIGH>;
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&uart1_bus>;
|
||||
clocks = <&cmu_peri CLK_GOUT_UART1_EXT_UCLK>,
|
||||
<&cmu_peri CLK_GOUT_UART1_PCLK>;
|
||||
clocks = <&cmu_peri CLK_GOUT_UART1_PCLK>,
|
||||
<&cmu_peri CLK_GOUT_UART1_EXT_UCLK>;
|
||||
clock-names = "uart", "clk_uart_baud0";
|
||||
samsung,uart-fifosize = <256>;
|
||||
status = "disabled";
|
||||
@ -306,8 +306,8 @@
|
||||
interrupts = <GIC_SPI 279 IRQ_TYPE_LEVEL_HIGH>;
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&uart2_bus>;
|
||||
clocks = <&cmu_peri CLK_GOUT_UART2_EXT_UCLK>,
|
||||
<&cmu_peri CLK_GOUT_UART2_PCLK>;
|
||||
clocks = <&cmu_peri CLK_GOUT_UART2_PCLK>,
|
||||
<&cmu_peri CLK_GOUT_UART2_EXT_UCLK>;
|
||||
clock-names = "uart", "clk_uart_baud0";
|
||||
samsung,uart-fifosize = <256>;
|
||||
status = "disabled";
|
||||
|
@ -79,7 +79,7 @@
|
||||
};
|
||||
};
|
||||
|
||||
soc {
|
||||
soc@0 {
|
||||
compatible = "simple-bus";
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
|
@ -456,13 +456,11 @@
|
||||
clock-names = "clk_ahb", "clk_xin";
|
||||
mmc-ddr-1_8v;
|
||||
mmc-hs200-1_8v;
|
||||
mmc-hs400-1_8v;
|
||||
ti,trm-icp = <0x2>;
|
||||
ti,otap-del-sel-legacy = <0x0>;
|
||||
ti,otap-del-sel-mmc-hs = <0x0>;
|
||||
ti,otap-del-sel-ddr52 = <0x6>;
|
||||
ti,otap-del-sel-hs200 = <0x7>;
|
||||
ti,otap-del-sel-hs400 = <0x4>;
|
||||
};
|
||||
|
||||
sdhci1: mmc@fa00000 {
|
||||
|
@ -33,7 +33,7 @@
|
||||
ranges;
|
||||
#interrupt-cells = <3>;
|
||||
interrupt-controller;
|
||||
reg = <0x00 0x01800000 0x00 0x200000>, /* GICD */
|
||||
reg = <0x00 0x01800000 0x00 0x100000>, /* GICD */
|
||||
<0x00 0x01900000 0x00 0x100000>, /* GICR */
|
||||
<0x00 0x6f000000 0x00 0x2000>, /* GICC */
|
||||
<0x00 0x6f010000 0x00 0x1000>, /* GICH */
|
||||
|
@ -362,11 +362,6 @@ struct kvm_vcpu_arch {
|
||||
struct arch_timer_cpu timer_cpu;
|
||||
struct kvm_pmu pmu;
|
||||
|
||||
/*
|
||||
* Anything that is not used directly from assembly code goes
|
||||
* here.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Guest registers we preserve during guest debugging.
|
||||
*
|
||||
|
@ -113,6 +113,9 @@ static __always_inline bool has_vhe(void)
|
||||
/*
|
||||
* Code only run in VHE/NVHE hyp context can assume VHE is present or
|
||||
* absent. Otherwise fall back to caps.
|
||||
* This allows the compiler to discard VHE-specific code from the
|
||||
* nVHE object, reducing the number of external symbol references
|
||||
* needed to link.
|
||||
*/
|
||||
if (is_vhe_hyp_code())
|
||||
return true;
|
||||
|
@ -1974,15 +1974,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
|
||||
#ifdef CONFIG_KVM
|
||||
static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
|
||||
{
|
||||
if (kvm_get_mode() != KVM_MODE_PROTECTED)
|
||||
return false;
|
||||
|
||||
if (is_kernel_in_hyp_mode()) {
|
||||
pr_warn("Protected KVM not available with VHE\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return kvm_get_mode() == KVM_MODE_PROTECTED;
|
||||
}
|
||||
#endif /* CONFIG_KVM */
|
||||
|
||||
@ -3109,7 +3101,6 @@ void cpu_set_feature(unsigned int num)
|
||||
WARN_ON(num >= MAX_CPU_FEATURES);
|
||||
elf_hwcap |= BIT(num);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpu_set_feature);
|
||||
|
||||
bool cpu_have_feature(unsigned int num)
|
||||
{
|
||||
|
@ -102,7 +102,6 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
|
||||
* x19-x29 per the AAPCS, and we created frame records upon entry, so we need
|
||||
* to restore x0-x8, x29, and x30.
|
||||
*/
|
||||
ftrace_common_return:
|
||||
/* Restore function arguments */
|
||||
ldp x0, x1, [sp]
|
||||
ldp x2, x3, [sp, #S_X2]
|
||||
|
@ -77,6 +77,66 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the address the callsite must branch to in order to reach '*addr'.
|
||||
*
|
||||
* Due to the limited range of 'BL' instructions, modules may be placed too far
|
||||
* away to branch directly and must use a PLT.
|
||||
*
|
||||
* Returns true when '*addr' contains a reachable target address, or has been
|
||||
* modified to contain a PLT address. Returns false otherwise.
|
||||
*/
|
||||
static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
|
||||
struct module *mod,
|
||||
unsigned long *addr)
|
||||
{
|
||||
unsigned long pc = rec->ip;
|
||||
long offset = (long)*addr - (long)pc;
|
||||
struct plt_entry *plt;
|
||||
|
||||
/*
|
||||
* When the target is within range of the 'BL' instruction, use 'addr'
|
||||
* as-is and branch to that directly.
|
||||
*/
|
||||
if (offset >= -SZ_128M && offset < SZ_128M)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* When the target is outside of the range of a 'BL' instruction, we
|
||||
* must use a PLT to reach it. We can only place PLTs for modules, and
|
||||
* only when module PLT support is built-in.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* 'mod' is only set at module load time, but if we end up
|
||||
* dealing with an out-of-range condition, we can assume it
|
||||
* is due to a module being loaded far away from the kernel.
|
||||
*
|
||||
* NOTE: __module_text_address() must be called with preemption
|
||||
* disabled, but we can rely on ftrace_lock to ensure that 'mod'
|
||||
* retains its validity throughout the remainder of this code.
|
||||
*/
|
||||
if (!mod) {
|
||||
preempt_disable();
|
||||
mod = __module_text_address(pc);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
if (WARN_ON(!mod))
|
||||
return false;
|
||||
|
||||
plt = get_ftrace_plt(mod, *addr);
|
||||
if (!plt) {
|
||||
pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
|
||||
return false;
|
||||
}
|
||||
|
||||
*addr = (unsigned long)plt;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn on the call to ftrace_caller() in instrumented function
|
||||
*/
|
||||
@ -84,41 +144,10 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
|
||||
{
|
||||
unsigned long pc = rec->ip;
|
||||
u32 old, new;
|
||||
long offset = (long)pc - (long)addr;
|
||||
|
||||
if (offset < -SZ_128M || offset >= SZ_128M) {
|
||||
struct module *mod;
|
||||
struct plt_entry *plt;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
|
||||
if (!ftrace_find_callable_addr(rec, NULL, &addr))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* On kernels that support module PLTs, the offset between the
|
||||
* branch instruction and its target may legally exceed the
|
||||
* range of an ordinary relative 'bl' opcode. In this case, we
|
||||
* need to branch via a trampoline in the module.
|
||||
*
|
||||
* NOTE: __module_text_address() must be called with preemption
|
||||
* disabled, but we can rely on ftrace_lock to ensure that 'mod'
|
||||
* retains its validity throughout the remainder of this code.
|
||||
*/
|
||||
preempt_disable();
|
||||
mod = __module_text_address(pc);
|
||||
preempt_enable();
|
||||
|
||||
if (WARN_ON(!mod))
|
||||
return -EINVAL;
|
||||
|
||||
plt = get_ftrace_plt(mod, addr);
|
||||
if (!plt) {
|
||||
pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
addr = (unsigned long)plt;
|
||||
}
|
||||
|
||||
old = aarch64_insn_gen_nop();
|
||||
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
|
||||
|
||||
@ -132,6 +161,11 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
|
||||
unsigned long pc = rec->ip;
|
||||
u32 old, new;
|
||||
|
||||
if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
|
||||
return -EINVAL;
|
||||
if (!ftrace_find_callable_addr(rec, NULL, &addr))
|
||||
return -EINVAL;
|
||||
|
||||
old = aarch64_insn_gen_branch_imm(pc, old_addr,
|
||||
AARCH64_INSN_BRANCH_LINK);
|
||||
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
|
||||
@ -181,54 +215,15 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
|
||||
unsigned long addr)
|
||||
{
|
||||
unsigned long pc = rec->ip;
|
||||
bool validate = true;
|
||||
u32 old = 0, new;
|
||||
long offset = (long)pc - (long)addr;
|
||||
|
||||
if (offset < -SZ_128M || offset >= SZ_128M) {
|
||||
u32 replaced;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
|
||||
if (!ftrace_find_callable_addr(rec, mod, &addr))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* 'mod' is only set at module load time, but if we end up
|
||||
* dealing with an out-of-range condition, we can assume it
|
||||
* is due to a module being loaded far away from the kernel.
|
||||
*/
|
||||
if (!mod) {
|
||||
preempt_disable();
|
||||
mod = __module_text_address(pc);
|
||||
preempt_enable();
|
||||
|
||||
if (WARN_ON(!mod))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* The instruction we are about to patch may be a branch and
|
||||
* link instruction that was redirected via a PLT entry. In
|
||||
* this case, the normal validation will fail, but we can at
|
||||
* least check that we are dealing with a branch and link
|
||||
* instruction that points into the right module.
|
||||
*/
|
||||
if (aarch64_insn_read((void *)pc, &replaced))
|
||||
return -EFAULT;
|
||||
|
||||
if (!aarch64_insn_is_bl(replaced) ||
|
||||
!within_module(pc + aarch64_get_branch_offset(replaced),
|
||||
mod))
|
||||
return -EINVAL;
|
||||
|
||||
validate = false;
|
||||
} else {
|
||||
old = aarch64_insn_gen_branch_imm(pc, addr,
|
||||
AARCH64_INSN_BRANCH_LINK);
|
||||
}
|
||||
|
||||
old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
|
||||
new = aarch64_insn_gen_nop();
|
||||
|
||||
return ftrace_modify_code(pc, old, new, validate);
|
||||
return ftrace_modify_code(pc, old, new, true);
|
||||
}
|
||||
|
||||
void arch_ftrace_update_code(int command)
|
||||
|
@ -303,14 +303,13 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
|
||||
early_fixmap_init();
|
||||
early_ioremap_init();
|
||||
|
||||
/*
|
||||
* Initialise the static keys early as they may be enabled by the
|
||||
* cpufeature code, early parameters, and DT setup.
|
||||
*/
|
||||
jump_label_init();
|
||||
|
||||
setup_machine_fdt(__fdt_pointer);
|
||||
|
||||
/*
|
||||
* Initialise the static keys early as they may be enabled by the
|
||||
* cpufeature code and early parameters.
|
||||
*/
|
||||
jump_label_init();
|
||||
parse_early_param();
|
||||
|
||||
/*
|
||||
|
@ -1230,6 +1230,9 @@ bool kvm_arch_timer_get_input_level(int vintid)
|
||||
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
|
||||
struct arch_timer_context *timer;
|
||||
|
||||
if (WARN(!vcpu, "No vcpu context!\n"))
|
||||
return false;
|
||||
|
||||
if (vintid == vcpu_vtimer(vcpu)->irq.irq)
|
||||
timer = vcpu_vtimer(vcpu);
|
||||
else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
|
||||
|
@ -150,8 +150,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
if (ret)
|
||||
goto out_free_stage2_pgd;
|
||||
|
||||
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL))
|
||||
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_stage2_pgd;
|
||||
}
|
||||
cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
|
||||
|
||||
kvm_vgic_early_init(kvm);
|
||||
@ -2110,11 +2112,11 @@ static int finalize_hyp_mode(void)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Exclude HYP BSS from kmemleak so that it doesn't get peeked
|
||||
* at, which would end badly once the section is inaccessible.
|
||||
* None of other sections should ever be introspected.
|
||||
* Exclude HYP sections from kmemleak so that they don't get peeked
|
||||
* at, which would end badly once inaccessible.
|
||||
*/
|
||||
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
|
||||
kmemleak_free_part(__va(hyp_mem_base), hyp_mem_size);
|
||||
return pkvm_drop_host_privileges();
|
||||
}
|
||||
|
||||
@ -2271,7 +2273,11 @@ static int __init early_kvm_mode_cfg(char *arg)
|
||||
return -EINVAL;
|
||||
|
||||
if (strcmp(arg, "protected") == 0) {
|
||||
if (!is_kernel_in_hyp_mode())
|
||||
kvm_mode = KVM_MODE_PROTECTED;
|
||||
else
|
||||
pr_warn_once("Protected KVM not available with VHE\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -80,6 +80,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
|
||||
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
|
||||
|
||||
vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED;
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
|
||||
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
|
||||
|
||||
@ -93,6 +94,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
|
||||
* operations. Do this for ZA as well for now for simplicity.
|
||||
*/
|
||||
if (system_supports_sme()) {
|
||||
vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED;
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
|
||||
vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED;
|
||||
|
||||
|
@ -314,15 +314,11 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
|
||||
int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
hyp_assert_lock_held(&host_kvm.lock);
|
||||
|
||||
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
|
||||
}
|
||||
|
||||
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
|
||||
{
|
||||
hyp_assert_lock_held(&host_kvm.lock);
|
||||
|
||||
return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
|
||||
addr, size, &host_s2_pool, owner_id);
|
||||
}
|
||||
|
@ -243,15 +243,9 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
|
||||
case SYS_ID_AA64MMFR2_EL1:
|
||||
return get_pvm_id_aa64mmfr2(vcpu);
|
||||
default:
|
||||
/*
|
||||
* Should never happen because all cases are covered in
|
||||
* pvm_sys_reg_descs[].
|
||||
*/
|
||||
WARN_ON(1);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Unhandled ID register, RAZ */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
||||
@ -332,6 +326,16 @@ static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu,
|
||||
/* Mark the specified system register as an AArch64 feature id register. */
|
||||
#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
|
||||
|
||||
/*
|
||||
* sys_reg_desc initialiser for architecturally unallocated cpufeature ID
|
||||
* register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
|
||||
* (1 <= crm < 8, 0 <= Op2 < 8).
|
||||
*/
|
||||
#define ID_UNALLOCATED(crm, op2) { \
|
||||
Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
|
||||
.access = pvm_access_id_aarch64, \
|
||||
}
|
||||
|
||||
/* Mark the specified system register as Read-As-Zero/Write-Ignored */
|
||||
#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
|
||||
|
||||
@ -375,24 +379,46 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
|
||||
AARCH32(SYS_MVFR0_EL1),
|
||||
AARCH32(SYS_MVFR1_EL1),
|
||||
AARCH32(SYS_MVFR2_EL1),
|
||||
ID_UNALLOCATED(3,3),
|
||||
AARCH32(SYS_ID_PFR2_EL1),
|
||||
AARCH32(SYS_ID_DFR1_EL1),
|
||||
AARCH32(SYS_ID_MMFR5_EL1),
|
||||
ID_UNALLOCATED(3,7),
|
||||
|
||||
/* AArch64 ID registers */
|
||||
/* CRm=4 */
|
||||
AARCH64(SYS_ID_AA64PFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64PFR1_EL1),
|
||||
ID_UNALLOCATED(4,2),
|
||||
ID_UNALLOCATED(4,3),
|
||||
AARCH64(SYS_ID_AA64ZFR0_EL1),
|
||||
ID_UNALLOCATED(4,5),
|
||||
ID_UNALLOCATED(4,6),
|
||||
ID_UNALLOCATED(4,7),
|
||||
AARCH64(SYS_ID_AA64DFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64DFR1_EL1),
|
||||
ID_UNALLOCATED(5,2),
|
||||
ID_UNALLOCATED(5,3),
|
||||
AARCH64(SYS_ID_AA64AFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64AFR1_EL1),
|
||||
ID_UNALLOCATED(5,6),
|
||||
ID_UNALLOCATED(5,7),
|
||||
AARCH64(SYS_ID_AA64ISAR0_EL1),
|
||||
AARCH64(SYS_ID_AA64ISAR1_EL1),
|
||||
AARCH64(SYS_ID_AA64ISAR2_EL1),
|
||||
ID_UNALLOCATED(6,3),
|
||||
ID_UNALLOCATED(6,4),
|
||||
ID_UNALLOCATED(6,5),
|
||||
ID_UNALLOCATED(6,6),
|
||||
ID_UNALLOCATED(6,7),
|
||||
AARCH64(SYS_ID_AA64MMFR0_EL1),
|
||||
AARCH64(SYS_ID_AA64MMFR1_EL1),
|
||||
AARCH64(SYS_ID_AA64MMFR2_EL1),
|
||||
ID_UNALLOCATED(7,3),
|
||||
ID_UNALLOCATED(7,4),
|
||||
ID_UNALLOCATED(7,5),
|
||||
ID_UNALLOCATED(7,6),
|
||||
ID_UNALLOCATED(7,7),
|
||||
|
||||
/* Scalable Vector Registers are restricted. */
|
||||
|
||||
|
@ -429,11 +429,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_spending,
|
||||
NULL, vgic_uaccess_write_spending, 1,
|
||||
vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_cpending,
|
||||
NULL, vgic_uaccess_write_cpending, 1,
|
||||
vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
|
||||
vgic_mmio_read_active, vgic_mmio_write_sactive,
|
||||
|
@ -353,42 +353,6 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
||||
u32 value = 0;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* pending state of interrupt is latched in pending_latch variable.
|
||||
* Userspace will save and restore pending state and line_level
|
||||
* separately.
|
||||
* Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
|
||||
* for handling of ISPENDR and ICPENDR.
|
||||
*/
|
||||
for (i = 0; i < len * 8; i++) {
|
||||
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
||||
bool state = irq->pending_latch;
|
||||
|
||||
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
|
||||
int err;
|
||||
|
||||
err = irq_get_irqchip_state(irq->host_irq,
|
||||
IRQCHIP_STATE_PENDING,
|
||||
&state);
|
||||
WARN_ON(err);
|
||||
}
|
||||
|
||||
if (state)
|
||||
value |= (1U << i);
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
@ -666,7 +630,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_spending,
|
||||
vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
|
||||
vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_cpending,
|
||||
@ -750,7 +714,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_spending,
|
||||
vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
|
||||
vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
|
||||
vgic_mmio_read_pending, vgic_mmio_write_cpending,
|
||||
|
@ -226,8 +226,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
static unsigned long __read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
bool is_user)
|
||||
{
|
||||
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
||||
u32 value = 0;
|
||||
@ -239,6 +240,15 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
||||
unsigned long flags;
|
||||
bool val;
|
||||
|
||||
/*
|
||||
* When used from userspace with a GICv3 model:
|
||||
*
|
||||
* Pending state of interrupt is latched in pending_latch
|
||||
* variable. Userspace will save and restore pending state
|
||||
* and line_level separately.
|
||||
* Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
|
||||
* for handling of ISPENDR and ICPENDR.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
|
||||
int err;
|
||||
@ -248,10 +258,20 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
||||
IRQCHIP_STATE_PENDING,
|
||||
&val);
|
||||
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
|
||||
} else if (vgic_irq_is_mapped_level(irq)) {
|
||||
} else if (!is_user && vgic_irq_is_mapped_level(irq)) {
|
||||
val = vgic_get_phys_line_level(irq);
|
||||
} else {
|
||||
switch (vcpu->kvm->arch.vgic.vgic_model) {
|
||||
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
||||
if (is_user) {
|
||||
val = irq->pending_latch;
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
default:
|
||||
val = irq_is_pending(irq);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
value |= ((u32)val << i);
|
||||
@ -263,6 +283,18 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
||||
return value;
|
||||
}
|
||||
|
||||
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
return __read_pending(vcpu, addr, len, false);
|
||||
}
|
||||
|
||||
unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
return __read_pending(vcpu, addr, len, true);
|
||||
}
|
||||
|
||||
static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
|
||||
{
|
||||
return (vgic_irq_is_sgi(irq->intid) &&
|
||||
|
@ -149,6 +149,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
|
||||
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len);
|
||||
|
||||
unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len);
|
||||
|
||||
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val);
|
||||
|
@ -218,8 +218,6 @@ SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area)
|
||||
*/
|
||||
SYM_FUNC_START(__pi___dma_map_area)
|
||||
add x1, x0, x1
|
||||
cmp w2, #DMA_FROM_DEVICE
|
||||
b.eq __pi_dcache_inval_poc
|
||||
b __pi_dcache_clean_poc
|
||||
SYM_FUNC_END(__pi___dma_map_area)
|
||||
SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area)
|
||||
|
@ -12,10 +12,9 @@ static inline unsigned long exception_era(struct pt_regs *regs)
|
||||
return regs->csr_era;
|
||||
}
|
||||
|
||||
static inline int compute_return_era(struct pt_regs *regs)
|
||||
static inline void compute_return_era(struct pt_regs *regs)
|
||||
{
|
||||
regs->csr_era += 4;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* _ASM_BRANCH_H */
|
||||
|
@ -426,6 +426,11 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
|
||||
|
||||
#define kern_addr_valid(addr) (1)
|
||||
|
||||
static inline unsigned long pmd_pfn(pmd_t pmd)
|
||||
{
|
||||
return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
|
||||
/* We don't have hardware dirty/accessed bits, generic_pmdp_establish is fine.*/
|
||||
@ -497,11 +502,6 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
|
||||
return pmd;
|
||||
}
|
||||
|
||||
static inline unsigned long pmd_pfn(pmd_t pmd)
|
||||
{
|
||||
return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT;
|
||||
}
|
||||
|
||||
static inline struct page *pmd_page(pmd_t pmd)
|
||||
{
|
||||
if (pmd_trans_huge(pmd))
|
||||
|
@ -263,7 +263,7 @@ void cpu_probe(void)
|
||||
|
||||
c->cputype = CPU_UNKNOWN;
|
||||
c->processor_id = read_cpucfg(LOONGARCH_CPUCFG0);
|
||||
c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) >> 3) & 0x3;
|
||||
c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) & CPUCFG2_FPVERS) >> 3;
|
||||
|
||||
c->fpu_csr0 = FPU_CSR_RN;
|
||||
c->fpu_mask = FPU_CSR_RSVD;
|
||||
|
@ -14,8 +14,6 @@
|
||||
|
||||
__REF
|
||||
|
||||
SYM_ENTRY(_stext, SYM_L_GLOBAL, SYM_A_NONE)
|
||||
|
||||
SYM_CODE_START(kernel_entry) # kernel entry point
|
||||
|
||||
/* Config direct window and set PG */
|
||||
|
@ -475,8 +475,7 @@ asmlinkage void noinstr do_ri(struct pt_regs *regs)
|
||||
|
||||
die_if_kernel("Reserved instruction in kernel code", regs);
|
||||
|
||||
if (unlikely(compute_return_era(regs) < 0))
|
||||
goto out;
|
||||
compute_return_era(regs);
|
||||
|
||||
if (unlikely(get_user(opcode, era) < 0)) {
|
||||
status = SIGSEGV;
|
||||
|
@ -37,6 +37,7 @@ SECTIONS
|
||||
HEAD_TEXT_SECTION
|
||||
|
||||
. = ALIGN(PECOFF_SEGMENT_ALIGN);
|
||||
_stext = .;
|
||||
.text : {
|
||||
TEXT_TEXT
|
||||
SCHED_TEXT
|
||||
@ -101,6 +102,7 @@ SECTIONS
|
||||
|
||||
STABS_DEBUG
|
||||
DWARF_DEBUG
|
||||
ELF_DETAILS
|
||||
|
||||
.gptab.sdata : {
|
||||
*(.gptab.data)
|
||||
|
@ -281,15 +281,16 @@ void setup_tlb_handler(int cpu)
|
||||
if (pcpu_handlers[cpu])
|
||||
return;
|
||||
|
||||
page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, get_order(vec_sz));
|
||||
page = alloc_pages_node(cpu_to_node(cpu), GFP_ATOMIC, get_order(vec_sz));
|
||||
if (!page)
|
||||
return;
|
||||
|
||||
addr = page_address(page);
|
||||
pcpu_handlers[cpu] = virt_to_phys(addr);
|
||||
pcpu_handlers[cpu] = (unsigned long)addr;
|
||||
memcpy((void *)addr, (void *)eentry, vec_sz);
|
||||
local_flush_icache_range((unsigned long)addr, (unsigned long)addr + vec_sz);
|
||||
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_TLBRENTRY);
|
||||
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY);
|
||||
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY);
|
||||
csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY);
|
||||
}
|
||||
#endif
|
||||
|
@ -111,8 +111,9 @@
|
||||
|
||||
clocks = <&cgu X1000_CLK_RTCLK>,
|
||||
<&cgu X1000_CLK_EXCLK>,
|
||||
<&cgu X1000_CLK_PCLK>;
|
||||
clock-names = "rtc", "ext", "pclk";
|
||||
<&cgu X1000_CLK_PCLK>,
|
||||
<&cgu X1000_CLK_TCU>;
|
||||
clock-names = "rtc", "ext", "pclk", "tcu";
|
||||
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <1>;
|
||||
|
@ -104,8 +104,9 @@
|
||||
|
||||
clocks = <&cgu X1830_CLK_RTCLK>,
|
||||
<&cgu X1830_CLK_EXCLK>,
|
||||
<&cgu X1830_CLK_PCLK>;
|
||||
clock-names = "rtc", "ext", "pclk";
|
||||
<&cgu X1830_CLK_PCLK>,
|
||||
<&cgu X1830_CLK_TCU>;
|
||||
clock-names = "rtc", "ext", "pclk", "tcu";
|
||||
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <1>;
|
||||
|
@ -44,6 +44,7 @@ static __init unsigned int ranchu_measure_hpt_freq(void)
|
||||
__func__);
|
||||
|
||||
rtc_base = of_iomap(np, 0);
|
||||
of_node_put(np);
|
||||
if (!rtc_base)
|
||||
panic("%s(): Failed to ioremap Goldfish RTC base!", __func__);
|
||||
|
||||
|
@ -208,6 +208,12 @@ void __init ltq_soc_init(void)
|
||||
of_address_to_resource(np_sysgpe, 0, &res_sys[2]))
|
||||
panic("Failed to get core resources");
|
||||
|
||||
of_node_put(np_status);
|
||||
of_node_put(np_ebu);
|
||||
of_node_put(np_sys1);
|
||||
of_node_put(np_syseth);
|
||||
of_node_put(np_sysgpe);
|
||||
|
||||
if ((request_mem_region(res_status.start, resource_size(&res_status),
|
||||
res_status.name) < 0) ||
|
||||
(request_mem_region(res_ebu.start, resource_size(&res_ebu),
|
||||
|
@ -408,6 +408,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
|
||||
if (!ltq_eiu_membase)
|
||||
panic("Failed to remap eiu memory");
|
||||
}
|
||||
of_node_put(eiu_node);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -441,6 +441,10 @@ void __init ltq_soc_init(void)
|
||||
of_address_to_resource(np_ebu, 0, &res_ebu))
|
||||
panic("Failed to get core resources");
|
||||
|
||||
of_node_put(np_pmu);
|
||||
of_node_put(np_cgu);
|
||||
of_node_put(np_ebu);
|
||||
|
||||
if (!request_mem_region(res_pmu.start, resource_size(&res_pmu),
|
||||
res_pmu.name) ||
|
||||
!request_mem_region(res_cgu.start, resource_size(&res_cgu),
|
||||
|
@ -214,6 +214,8 @@ static void update_gic_frequency_dt(void)
|
||||
|
||||
if (of_update_property(node, &gic_frequency_prop) < 0)
|
||||
pr_err("error updating gic frequency property\n");
|
||||
|
||||
of_node_put(node);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -98,13 +98,18 @@ static int __init pic32_of_prepare_platform_data(struct of_dev_auxdata *lookup)
|
||||
np = of_find_compatible_node(NULL, NULL, lookup->compatible);
|
||||
if (np) {
|
||||
lookup->name = (char *)np->name;
|
||||
if (lookup->phys_addr)
|
||||
if (lookup->phys_addr) {
|
||||
of_node_put(np);
|
||||
continue;
|
||||
}
|
||||
if (!of_address_to_resource(np, 0, &res))
|
||||
lookup->phys_addr = res.start;
|
||||
of_node_put(np);
|
||||
}
|
||||
}
|
||||
|
||||
of_node_put(root);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -32,6 +32,9 @@ static unsigned int pic32_xlate_core_timer_irq(void)
|
||||
goto default_map;
|
||||
|
||||
irq = irq_of_parse_and_map(node, 0);
|
||||
|
||||
of_node_put(node);
|
||||
|
||||
if (!irq)
|
||||
goto default_map;
|
||||
|
||||
|
@ -40,6 +40,8 @@ __iomem void *plat_of_remap_node(const char *node)
|
||||
if (of_address_to_resource(np, 0, &res))
|
||||
panic("Failed to get resource for %s", node);
|
||||
|
||||
of_node_put(np);
|
||||
|
||||
if (!request_mem_region(res.start,
|
||||
resource_size(&res),
|
||||
res.name))
|
||||
|
@ -640,8 +640,6 @@ static int icu_get_irq(unsigned int irq)
|
||||
|
||||
printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2);
|
||||
|
||||
atomic_inc(&irq_err_count);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -10,6 +10,7 @@ config PARISC
|
||||
select ARCH_WANT_FRAME_POINTERS
|
||||
select ARCH_HAS_ELF_RANDOMIZE
|
||||
select ARCH_HAS_STRICT_KERNEL_RWX
|
||||
select ARCH_HAS_STRICT_MODULE_RWX
|
||||
select ARCH_HAS_UBSAN_SANITIZE_ALL
|
||||
select ARCH_HAS_PTE_SPECIAL
|
||||
select ARCH_NO_SG_CHAIN
|
||||
|
@ -12,7 +12,7 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
|
||||
pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_STI_CONSOLE) || defined(CONFIG_FB_STI)
|
||||
#if defined(CONFIG_FB_STI)
|
||||
int fb_is_primary_device(struct fb_info *info);
|
||||
#else
|
||||
static inline int fb_is_primary_device(struct fb_info *info)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user