Merge branch 'master' into next
Conflicts: fs/namei.c Manually merged per: diff --cc fs/namei.c index 734f2b5,bbc15c2..0000000 --- a/fs/namei.c +++ b/fs/namei.c @@@ -860,9 -848,8 +849,10 @@@ static int __link_path_walk(const char nd->flags |= LOOKUP_CONTINUE; err = exec_permission_lite(inode); if (err == -EAGAIN) - err = vfs_permission(nd, MAY_EXEC); + err = inode_permission(nd->path.dentry->d_inode, + MAY_EXEC); + if (!err) + err = ima_path_check(&nd->path, MAY_EXEC); if (err) break; @@@ -1525,14 -1506,9 +1509,14 @@@ int may_open(struct path *path, int acc flag &= ~O_TRUNC; } - error = vfs_permission(nd, acc_mode); + error = inode_permission(inode, acc_mode); if (error) return error; + - error = ima_path_check(&nd->path, ++ error = ima_path_check(path, + acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); + if (error) + return error; /* * An append-only file must be opened in append mode for writing. */ Signed-off-by: James Morris <jmorris@namei.org>
This commit is contained in:
commit
cb5629b10d
1
.mailmap
1
.mailmap
@ -32,6 +32,7 @@ Christoph Hellwig <hch@lst.de>
|
||||
Corey Minyard <minyard@acm.org>
|
||||
David Brownell <david-b@pacbell.net>
|
||||
David Woodhouse <dwmw2@shinybook.infradead.org>
|
||||
Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
|
||||
Domen Puncer <domen@coderock.org>
|
||||
Douglas Gilbert <dougg@torque.net>
|
||||
Ed L. Cashin <ecashin@coraid.com>
|
||||
|
38
CREDITS
38
CREDITS
@ -369,10 +369,10 @@ P: 1024/8462A731 4C 55 86 34 44 59 A7 99 2B 97 88 4A 88 9A 0D 97
|
||||
D: sun4 port, Sparc hacker
|
||||
|
||||
N: Hugh Blemings
|
||||
E: hugh@misc.nu
|
||||
W: http://misc.nu/hugh/
|
||||
D: Author and maintainer of the Keyspan USB to Serial drivers
|
||||
S: Po Box 234
|
||||
E: hugh@blemings.org
|
||||
W: http://blemings.org/hugh
|
||||
D: Original author of the Keyspan USB to serial drivers, random PowerPC hacker
|
||||
S: PO Box 234
|
||||
S: Belconnen ACT 2616
|
||||
S: Australia
|
||||
|
||||
@ -464,6 +464,11 @@ S: 1200 Goldenrod Dr.
|
||||
S: Nampa, Idaho 83686
|
||||
S: USA
|
||||
|
||||
N: Dirk J. Brandewie
|
||||
E: dirk.j.brandewie@intel.com
|
||||
E: linux-wimax@intel.com
|
||||
D: Intel Wireless WiMAX Connection 2400 SDIO driver
|
||||
|
||||
N: Derrick J. Brashear
|
||||
E: shadow@dementia.org
|
||||
W: http://www.dementia.org/~shadow
|
||||
@ -1681,7 +1686,7 @@ E: ajoshi@shell.unixbox.com
|
||||
D: fbdev hacking
|
||||
|
||||
N: Jesper Juhl
|
||||
E: jesper.juhl@gmail.com
|
||||
E: jj@chaosbits.net
|
||||
D: Various fixes, cleanups and minor features all over the tree.
|
||||
D: Wrote initial version of the hdaps driver (since passed on to others).
|
||||
S: Lemnosvej 1, 3.tv
|
||||
@ -2119,6 +2124,11 @@ N: H.J. Lu
|
||||
E: hjl@gnu.ai.mit.edu
|
||||
D: GCC + libraries hacker
|
||||
|
||||
N: Yanir Lubetkin
|
||||
E: yanirx.lubatkin@intel.com
|
||||
E: linux-wimax@intel.com
|
||||
D: Intel Wireless WiMAX Connection 2400 driver
|
||||
|
||||
N: Michal Ludvig
|
||||
E: michal@logix.cz
|
||||
E: michal.ludvig@asterisk.co.nz
|
||||
@ -2693,6 +2703,13 @@ S: RR #5, 497 Pole Line Road
|
||||
S: Thunder Bay, Ontario
|
||||
S: CANADA P7C 5M9
|
||||
|
||||
N: Inaky Perez-Gonzalez
|
||||
E: inaky.perez-gonzalez@intel.com
|
||||
E: linux-wimax@intel.com
|
||||
E: inakypg@yahoo.com
|
||||
D: WiMAX stack
|
||||
D: Intel Wireless WiMAX Connection 2400 driver
|
||||
|
||||
N: Yuri Per
|
||||
E: yuri@pts.mipt.ru
|
||||
D: Some smbfs fixes
|
||||
@ -3769,14 +3786,11 @@ S: The Netherlands
|
||||
|
||||
N: David Woodhouse
|
||||
E: dwmw2@infradead.org
|
||||
D: ARCnet stuff, Applicom board driver, SO_BINDTODEVICE,
|
||||
D: some Alpha platform porting from 2.0, Memory Technology Devices,
|
||||
D: Acquire watchdog timer, PC speaker driver maintenance,
|
||||
D: JFFS2 file system, Memory Technology Device subsystem,
|
||||
D: various other stuff that annoyed me by not working.
|
||||
S: c/o Red Hat Engineering
|
||||
S: Rustat House
|
||||
S: 60 Clifton Road
|
||||
S: Cambridge. CB1 7EG
|
||||
S: c/o Intel Corporation
|
||||
S: Pipers Way
|
||||
S: Swindon. SN3 1RJ
|
||||
S: England
|
||||
|
||||
N: Chris Wright
|
||||
|
@ -3,8 +3,9 @@ Date: April 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
state. This holds the regulator output state.
|
||||
Some regulator directories will contain a field called
|
||||
state. This reports the regulator enable status, for
|
||||
regulators which can report that value.
|
||||
|
||||
This will be one of the following strings:
|
||||
|
||||
@ -18,7 +19,8 @@ Description:
|
||||
'disabled' means the regulator output is OFF and is not
|
||||
supplying power to the system..
|
||||
|
||||
'unknown' means software cannot determine the state.
|
||||
'unknown' means software cannot determine the state, or
|
||||
the reported state is invalid.
|
||||
|
||||
NOTE: this field can be used in conjunction with microvolts
|
||||
and microamps to determine regulator output levels.
|
||||
@ -53,9 +55,10 @@ Date: April 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
microvolts. This holds the regulator output voltage setting
|
||||
measured in microvolts (i.e. E-6 Volts).
|
||||
measured in microvolts (i.e. E-6 Volts), for regulators
|
||||
which can report that voltage.
|
||||
|
||||
NOTE: This value should not be used to determine the regulator
|
||||
output voltage level as this value is the same regardless of
|
||||
@ -67,9 +70,10 @@ Date: April 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
microamps. This holds the regulator output current limit
|
||||
setting measured in microamps (i.e. E-6 Amps).
|
||||
setting measured in microamps (i.e. E-6 Amps), for regulators
|
||||
which can report that current.
|
||||
|
||||
NOTE: This value should not be used to determine the regulator
|
||||
output current level as this value is the same regardless of
|
||||
@ -81,8 +85,9 @@ Date: April 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
opmode. This holds the regulator operating mode setting.
|
||||
Some regulator directories will contain a field called
|
||||
opmode. This holds the current regulator operating mode,
|
||||
for regulators which can report it.
|
||||
|
||||
The opmode value can be one of the following strings:
|
||||
|
||||
@ -92,7 +97,7 @@ Description:
|
||||
'standby'
|
||||
'unknown'
|
||||
|
||||
The modes are described in include/linux/regulator/regulator.h
|
||||
The modes are described in include/linux/regulator/consumer.h
|
||||
|
||||
NOTE: This value should not be used to determine the regulator
|
||||
output operating mode as this value is the same regardless of
|
||||
@ -104,9 +109,10 @@ Date: April 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
min_microvolts. This holds the minimum safe working regulator
|
||||
output voltage setting for this domain measured in microvolts.
|
||||
output voltage setting for this domain measured in microvolts,
|
||||
for regulators which support voltage constraints.
|
||||
|
||||
NOTE: this will return the string 'constraint not defined' if
|
||||
the power domain has no min microvolts constraint defined by
|
||||
@ -118,9 +124,10 @@ Date: April 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
max_microvolts. This holds the maximum safe working regulator
|
||||
output voltage setting for this domain measured in microvolts.
|
||||
output voltage setting for this domain measured in microvolts,
|
||||
for regulators which support voltage constraints.
|
||||
|
||||
NOTE: this will return the string 'constraint not defined' if
|
||||
the power domain has no max microvolts constraint defined by
|
||||
@ -132,10 +139,10 @@ Date: April 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
min_microamps. This holds the minimum safe working regulator
|
||||
output current limit setting for this domain measured in
|
||||
microamps.
|
||||
microamps, for regulators which support current constraints.
|
||||
|
||||
NOTE: this will return the string 'constraint not defined' if
|
||||
the power domain has no min microamps constraint defined by
|
||||
@ -147,10 +154,10 @@ Date: April 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
max_microamps. This holds the maximum safe working regulator
|
||||
output current limit setting for this domain measured in
|
||||
microamps.
|
||||
microamps, for regulators which support current constraints.
|
||||
|
||||
NOTE: this will return the string 'constraint not defined' if
|
||||
the power domain has no max microamps constraint defined by
|
||||
@ -185,7 +192,7 @@ Date: April 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
requested_microamps. This holds the total requested load
|
||||
current in microamps for this regulator from all its consumer
|
||||
devices.
|
||||
@ -204,125 +211,102 @@ Date: May 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
suspend_mem_microvolts. This holds the regulator output
|
||||
voltage setting for this domain measured in microvolts when
|
||||
the system is suspended to memory.
|
||||
|
||||
NOTE: this will return the string 'not defined' if
|
||||
the power domain has no suspend to memory voltage defined by
|
||||
platform code.
|
||||
the system is suspended to memory, for voltage regulators
|
||||
implementing suspend voltage configuration constraints.
|
||||
|
||||
What: /sys/class/regulator/.../suspend_disk_microvolts
|
||||
Date: May 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
suspend_disk_microvolts. This holds the regulator output
|
||||
voltage setting for this domain measured in microvolts when
|
||||
the system is suspended to disk.
|
||||
|
||||
NOTE: this will return the string 'not defined' if
|
||||
the power domain has no suspend to disk voltage defined by
|
||||
platform code.
|
||||
the system is suspended to disk, for voltage regulators
|
||||
implementing suspend voltage configuration constraints.
|
||||
|
||||
What: /sys/class/regulator/.../suspend_standby_microvolts
|
||||
Date: May 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
suspend_standby_microvolts. This holds the regulator output
|
||||
voltage setting for this domain measured in microvolts when
|
||||
the system is suspended to standby.
|
||||
|
||||
NOTE: this will return the string 'not defined' if
|
||||
the power domain has no suspend to standby voltage defined by
|
||||
platform code.
|
||||
the system is suspended to standby, for voltage regulators
|
||||
implementing suspend voltage configuration constraints.
|
||||
|
||||
What: /sys/class/regulator/.../suspend_mem_mode
|
||||
Date: May 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
suspend_mem_mode. This holds the regulator operating mode
|
||||
setting for this domain when the system is suspended to
|
||||
memory.
|
||||
|
||||
NOTE: this will return the string 'not defined' if
|
||||
the power domain has no suspend to memory mode defined by
|
||||
platform code.
|
||||
memory, for regulators implementing suspend mode
|
||||
configuration constraints.
|
||||
|
||||
What: /sys/class/regulator/.../suspend_disk_mode
|
||||
Date: May 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
suspend_disk_mode. This holds the regulator operating mode
|
||||
setting for this domain when the system is suspended to disk.
|
||||
|
||||
NOTE: this will return the string 'not defined' if
|
||||
the power domain has no suspend to disk mode defined by
|
||||
platform code.
|
||||
setting for this domain when the system is suspended to disk,
|
||||
for regulators implementing suspend mode configuration
|
||||
constraints.
|
||||
|
||||
What: /sys/class/regulator/.../suspend_standby_mode
|
||||
Date: May 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
suspend_standby_mode. This holds the regulator operating mode
|
||||
setting for this domain when the system is suspended to
|
||||
standby.
|
||||
|
||||
NOTE: this will return the string 'not defined' if
|
||||
the power domain has no suspend to standby mode defined by
|
||||
platform code.
|
||||
standby, for regulators implementing suspend mode
|
||||
configuration constraints.
|
||||
|
||||
What: /sys/class/regulator/.../suspend_mem_state
|
||||
Date: May 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
suspend_mem_state. This holds the regulator operating state
|
||||
when suspended to memory.
|
||||
when suspended to memory, for regulators implementing suspend
|
||||
configuration constraints.
|
||||
|
||||
This will be one of the following strings:
|
||||
|
||||
'enabled'
|
||||
'disabled'
|
||||
'not defined'
|
||||
This will be one of the same strings reported by
|
||||
the "state" attribute.
|
||||
|
||||
What: /sys/class/regulator/.../suspend_disk_state
|
||||
Date: May 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
suspend_disk_state. This holds the regulator operating state
|
||||
when suspended to disk.
|
||||
when suspended to disk, for regulators implementing
|
||||
suspend configuration constraints.
|
||||
|
||||
This will be one of the following strings:
|
||||
|
||||
'enabled'
|
||||
'disabled'
|
||||
'not defined'
|
||||
This will be one of the same strings reported by
|
||||
the "state" attribute.
|
||||
|
||||
What: /sys/class/regulator/.../suspend_standby_state
|
||||
Date: May 2008
|
||||
KernelVersion: 2.6.26
|
||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||
Description:
|
||||
Each regulator directory will contain a field called
|
||||
Some regulator directories will contain a field called
|
||||
suspend_standby_state. This holds the regulator operating
|
||||
state when suspended to standby.
|
||||
state when suspended to standby, for regulators implementing
|
||||
suspend configuration constraints.
|
||||
|
||||
This will be one of the following strings:
|
||||
|
||||
'enabled'
|
||||
'disabled'
|
||||
'not defined'
|
||||
This will be one of the same strings reported by
|
||||
the "state" attribute.
|
||||
|
@ -32,14 +32,16 @@ Contact: linux-usb@vger.kernel.org
|
||||
Description:
|
||||
Write:
|
||||
|
||||
<channel> [<bpst offset>]
|
||||
<channel>
|
||||
|
||||
to start beaconing on a specific channel, or stop
|
||||
beaconing if <channel> is -1. Valid channels depends
|
||||
on the radio controller's supported band groups.
|
||||
to force a specific channel to be used when beaconing,
|
||||
or, if <channel> is -1, to prohibit beaconing. If
|
||||
<channel> is 0, then the default channel selection
|
||||
algorithm will be used. Valid channels depends on the
|
||||
radio controller's supported band groups.
|
||||
|
||||
<bpst offset> may be used to try and join a specific
|
||||
beacon group if more than one was found during a scan.
|
||||
Reading returns the currently active channel, or -1 if
|
||||
the radio controller is not beaconing.
|
||||
|
||||
What: /sys/class/uwb_rc/uwbN/scan
|
||||
Date: July 2008
|
||||
|
@ -6,7 +6,6 @@ Description:
|
||||
internal state of the kernel memory blocks. Files could be
|
||||
added or removed dynamically to represent hot-add/remove
|
||||
operations.
|
||||
|
||||
Users: hotplug memory add/remove tools
|
||||
https://w3.opensource.ibm.com/projects/powerpc-utils/
|
||||
|
||||
@ -19,6 +18,56 @@ Description:
|
||||
This is useful for a user-level agent to determine
|
||||
identify removable sections of the memory before attempting
|
||||
potentially expensive hot-remove memory operation
|
||||
|
||||
Users: hotplug memory remove tools
|
||||
https://w3.opensource.ibm.com/projects/powerpc-utils/
|
||||
|
||||
What: /sys/devices/system/memory/memoryX/phys_device
|
||||
Date: September 2008
|
||||
Contact: Badari Pulavarty <pbadari@us.ibm.com>
|
||||
Description:
|
||||
The file /sys/devices/system/memory/memoryX/phys_device
|
||||
is read-only and is designed to show the name of physical
|
||||
memory device. Implementation is currently incomplete.
|
||||
|
||||
What: /sys/devices/system/memory/memoryX/phys_index
|
||||
Date: September 2008
|
||||
Contact: Badari Pulavarty <pbadari@us.ibm.com>
|
||||
Description:
|
||||
The file /sys/devices/system/memory/memoryX/phys_index
|
||||
is read-only and contains the section ID in hexadecimal
|
||||
which is equivalent to decimal X contained in the
|
||||
memory section directory name.
|
||||
|
||||
What: /sys/devices/system/memory/memoryX/state
|
||||
Date: September 2008
|
||||
Contact: Badari Pulavarty <pbadari@us.ibm.com>
|
||||
Description:
|
||||
The file /sys/devices/system/memory/memoryX/state
|
||||
is read-write. When read, it's contents show the
|
||||
online/offline state of the memory section. When written,
|
||||
root can toggle the the online/offline state of a removable
|
||||
memory section (see removable file description above)
|
||||
using the following commands.
|
||||
# echo online > /sys/devices/system/memory/memoryX/state
|
||||
# echo offline > /sys/devices/system/memory/memoryX/state
|
||||
|
||||
For example, if /sys/devices/system/memory/memory22/removable
|
||||
contains a value of 1 and
|
||||
/sys/devices/system/memory/memory22/state contains the
|
||||
string "online" the following command can be executed by
|
||||
by root to offline that section.
|
||||
# echo offline > /sys/devices/system/memory/memory22/state
|
||||
Users: hotplug memory remove tools
|
||||
https://w3.opensource.ibm.com/projects/powerpc-utils/
|
||||
|
||||
What: /sys/devices/system/node/nodeX/memoryY
|
||||
Date: September 2008
|
||||
Contact: Gary Hade <garyhade@us.ibm.com>
|
||||
Description:
|
||||
When CONFIG_NUMA is enabled
|
||||
/sys/devices/system/node/nodeX/memoryY is a symbolic link that
|
||||
points to the corresponding /sys/devices/system/memory/memoryY
|
||||
memory section directory. For example, the following symbolic
|
||||
link is created for memory section 9 on node0.
|
||||
/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
|
||||
|
||||
|
@ -33,10 +33,12 @@ o Gnu make 3.79.1 # make --version
|
||||
o binutils 2.12 # ld -v
|
||||
o util-linux 2.10o # fdformat --version
|
||||
o module-init-tools 0.9.10 # depmod -V
|
||||
o e2fsprogs 1.29 # tune2fs
|
||||
o e2fsprogs 1.41.4 # e2fsck -V
|
||||
o jfsutils 1.1.3 # fsck.jfs -V
|
||||
o reiserfsprogs 3.6.3 # reiserfsck -V 2>&1|grep reiserfsprogs
|
||||
o xfsprogs 2.6.0 # xfs_db -V
|
||||
o squashfs-tools 4.0 # mksquashfs -version
|
||||
o btrfs-progs 0.18 # btrfsck
|
||||
o pcmciautils 004 # pccardctl -V
|
||||
o quota-tools 3.09 # quota -V
|
||||
o PPP 2.4.0 # pppd --version
|
||||
|
@ -483,17 +483,25 @@ values. To do the latter, you can stick the following in your .emacs file:
|
||||
(* (max steps 1)
|
||||
c-basic-offset)))
|
||||
|
||||
(add-hook 'c-mode-common-hook
|
||||
(lambda ()
|
||||
;; Add kernel style
|
||||
(c-add-style
|
||||
"linux-tabs-only"
|
||||
'("linux" (c-offsets-alist
|
||||
(arglist-cont-nonempty
|
||||
c-lineup-gcc-asm-reg
|
||||
c-lineup-arglist-tabs-only))))))
|
||||
|
||||
(add-hook 'c-mode-hook
|
||||
(lambda ()
|
||||
(let ((filename (buffer-file-name)))
|
||||
;; Enable kernel mode for the appropriate files
|
||||
(when (and filename
|
||||
(string-match "~/src/linux-trees" filename))
|
||||
(string-match (expand-file-name "~/src/linux-trees")
|
||||
filename))
|
||||
(setq indent-tabs-mode t)
|
||||
(c-set-style "linux")
|
||||
(c-set-offset 'arglist-cont-nonempty
|
||||
'(c-lineup-gcc-asm-reg
|
||||
c-lineup-arglist-tabs-only))))))
|
||||
(c-set-style "linux-tabs-only")))))
|
||||
|
||||
This will make emacs go better with the kernel coding style for C
|
||||
files below ~/src/linux-trees.
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
This document describes the DMA API. For a more gentle introduction
|
||||
phrased in terms of the pci_ equivalents (and actual examples) see
|
||||
DMA-mapping.txt
|
||||
Documentation/PCI/PCI-DMA-mapping.txt.
|
||||
|
||||
This API is split into two pieces. Part I describes the API and the
|
||||
corresponding pci_ API. Part II describes the extensions to the API
|
||||
@ -170,16 +170,15 @@ Returns: 0 if successful and a negative error if not.
|
||||
u64
|
||||
dma_get_required_mask(struct device *dev)
|
||||
|
||||
After setting the mask with dma_set_mask(), this API returns the
|
||||
actual mask (within that already set) that the platform actually
|
||||
requires to operate efficiently. Usually this means the returned mask
|
||||
This API returns the mask that the platform requires to
|
||||
operate efficiently. Usually this means the returned mask
|
||||
is the minimum required to cover all of memory. Examining the
|
||||
required mask gives drivers with variable descriptor sizes the
|
||||
opportunity to use smaller descriptors as necessary.
|
||||
|
||||
Requesting the required mask does not alter the current mask. If you
|
||||
wish to take advantage of it, you should issue another dma_set_mask()
|
||||
call to lower the mask again.
|
||||
wish to take advantage of it, you should issue a dma_set_mask()
|
||||
call to set the mask to the value returned.
|
||||
|
||||
|
||||
Part Id - Streaming DMA mappings
|
||||
|
@ -26,7 +26,7 @@ mapped only for the time they are actually used and unmapped after the DMA
|
||||
transfer.
|
||||
|
||||
The following API will work of course even on platforms where no such
|
||||
hardware exists, see e.g. include/asm-i386/pci.h for how it is implemented on
|
||||
hardware exists, see e.g. arch/x86/include/asm/pci.h for how it is implemented on
|
||||
top of the virt_to_bus interface.
|
||||
|
||||
First of all, you should make sure
|
||||
|
@ -12,7 +12,7 @@ DOCBOOKS := z8530book.xml mcabook.xml \
|
||||
kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
|
||||
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
|
||||
genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
|
||||
mac80211.xml debugobjects.xml sh.xml
|
||||
mac80211.xml debugobjects.xml sh.xml regulator.xml
|
||||
|
||||
###
|
||||
# The build process is as follows (targets):
|
||||
|
@ -74,6 +74,14 @@
|
||||
!Enet/sunrpc/rpcb_clnt.c
|
||||
!Enet/sunrpc/clnt.c
|
||||
</sect1>
|
||||
<sect1><title>WiMAX</title>
|
||||
!Enet/wimax/op-msg.c
|
||||
!Enet/wimax/op-reset.c
|
||||
!Enet/wimax/op-rfkill.c
|
||||
!Enet/wimax/stack.c
|
||||
!Iinclude/net/wimax.h
|
||||
!Iinclude/linux/wimax.h
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="netdev">
|
||||
|
304
Documentation/DocBook/regulator.tmpl
Normal file
304
Documentation/DocBook/regulator.tmpl
Normal file
@ -0,0 +1,304 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
|
||||
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
|
||||
|
||||
<book id="regulator-api">
|
||||
<bookinfo>
|
||||
<title>Voltage and current regulator API</title>
|
||||
|
||||
<authorgroup>
|
||||
<author>
|
||||
<firstname>Liam</firstname>
|
||||
<surname>Girdwood</surname>
|
||||
<affiliation>
|
||||
<address>
|
||||
<email>lrg@slimlogic.co.uk</email>
|
||||
</address>
|
||||
</affiliation>
|
||||
</author>
|
||||
<author>
|
||||
<firstname>Mark</firstname>
|
||||
<surname>Brown</surname>
|
||||
<affiliation>
|
||||
<orgname>Wolfson Microelectronics</orgname>
|
||||
<address>
|
||||
<email>broonie@opensource.wolfsonmicro.com</email>
|
||||
</address>
|
||||
</affiliation>
|
||||
</author>
|
||||
</authorgroup>
|
||||
|
||||
<copyright>
|
||||
<year>2007-2008</year>
|
||||
<holder>Wolfson Microelectronics</holder>
|
||||
</copyright>
|
||||
<copyright>
|
||||
<year>2008</year>
|
||||
<holder>Liam Girdwood</holder>
|
||||
</copyright>
|
||||
|
||||
<legalnotice>
|
||||
<para>
|
||||
This documentation is free software; you can redistribute
|
||||
it and/or modify it under the terms of the GNU General Public
|
||||
License version 2 as published by the Free Software Foundation.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
This program is distributed in the hope that it will be
|
||||
useful, but WITHOUT ANY WARRANTY; without even the implied
|
||||
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
You should have received a copy of the GNU General Public
|
||||
License along with this program; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
||||
MA 02111-1307 USA
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For more details see the file COPYING in the source
|
||||
distribution of Linux.
|
||||
</para>
|
||||
</legalnotice>
|
||||
</bookinfo>
|
||||
|
||||
<toc></toc>
|
||||
|
||||
<chapter id="intro">
|
||||
<title>Introduction</title>
|
||||
<para>
|
||||
This framework is designed to provide a standard kernel
|
||||
interface to control voltage and current regulators.
|
||||
</para>
|
||||
<para>
|
||||
The intention is to allow systems to dynamically control
|
||||
regulator power output in order to save power and prolong
|
||||
battery life. This applies to both voltage regulators (where
|
||||
voltage output is controllable) and current sinks (where current
|
||||
limit is controllable).
|
||||
</para>
|
||||
<para>
|
||||
Note that additional (and currently more complete) documentation
|
||||
is available in the Linux kernel source under
|
||||
<filename>Documentation/power/regulator</filename>.
|
||||
</para>
|
||||
|
||||
<sect1 id="glossary">
|
||||
<title>Glossary</title>
|
||||
<para>
|
||||
The regulator API uses a number of terms which may not be
|
||||
familiar:
|
||||
</para>
|
||||
<glossary>
|
||||
|
||||
<glossentry>
|
||||
<glossterm>Regulator</glossterm>
|
||||
<glossdef>
|
||||
<para>
|
||||
Electronic device that supplies power to other devices. Most
|
||||
regulators can enable and disable their output and some can also
|
||||
control their output voltage or current.
|
||||
</para>
|
||||
</glossdef>
|
||||
</glossentry>
|
||||
|
||||
<glossentry>
|
||||
<glossterm>Consumer</glossterm>
|
||||
<glossdef>
|
||||
<para>
|
||||
Electronic device which consumes power provided by a regulator.
|
||||
These may either be static, requiring only a fixed supply, or
|
||||
dynamic, requiring active management of the regulator at
|
||||
runtime.
|
||||
</para>
|
||||
</glossdef>
|
||||
</glossentry>
|
||||
|
||||
<glossentry>
|
||||
<glossterm>Power Domain</glossterm>
|
||||
<glossdef>
|
||||
<para>
|
||||
The electronic circuit supplied by a given regulator, including
|
||||
the regulator and all consumer devices. The configuration of
|
||||
the regulator is shared between all the components in the
|
||||
circuit.
|
||||
</para>
|
||||
</glossdef>
|
||||
</glossentry>
|
||||
|
||||
<glossentry>
|
||||
<glossterm>Power Management Integrated Circuit</glossterm>
|
||||
<acronym>PMIC</acronym>
|
||||
<glossdef>
|
||||
<para>
|
||||
An IC which contains numerous regulators and often also other
|
||||
subsystems. In an embedded system the primary PMIC is often
|
||||
equivalent to a combination of the PSU and southbridge in a
|
||||
desktop system.
|
||||
</para>
|
||||
</glossdef>
|
||||
</glossentry>
|
||||
</glossary>
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="consumer">
|
||||
<title>Consumer driver interface</title>
|
||||
<para>
|
||||
This offers a similar API to the kernel clock framework.
|
||||
Consumer drivers use <link
|
||||
linkend='API-regulator-get'>get</link> and <link
|
||||
linkend='API-regulator-put'>put</link> operations to acquire and
|
||||
release regulators. Functions are
|
||||
provided to <link linkend='API-regulator-enable'>enable</link>
|
||||
and <link linkend='API-regulator-disable'>disable</link> the
|
||||
reguator and to get and set the runtime parameters of the
|
||||
regulator.
|
||||
</para>
|
||||
<para>
|
||||
When requesting regulators consumers use symbolic names for their
|
||||
supplies, such as "Vcc", which are mapped into actual regulator
|
||||
devices by the machine interface.
|
||||
</para>
|
||||
<para>
|
||||
A stub version of this API is provided when the regulator
|
||||
framework is not in use in order to minimise the need to use
|
||||
ifdefs.
|
||||
</para>
|
||||
|
||||
<sect1 id="consumer-enable">
|
||||
<title>Enabling and disabling</title>
|
||||
<para>
|
||||
The regulator API provides reference counted enabling and
|
||||
disabling of regulators. Consumer devices use the <function><link
|
||||
linkend='API-regulator-enable'>regulator_enable</link></function>
|
||||
and <function><link
|
||||
linkend='API-regulator-disable'>regulator_disable</link>
|
||||
</function> functions to enable and disable regulators. Calls
|
||||
to the two functions must be balanced.
|
||||
</para>
|
||||
<para>
|
||||
Note that since multiple consumers may be using a regulator and
|
||||
machine constraints may not allow the regulator to be disabled
|
||||
there is no guarantee that calling
|
||||
<function>regulator_disable</function> will actually cause the
|
||||
supply provided by the regulator to be disabled. Consumer
|
||||
drivers should assume that the regulator may be enabled at all
|
||||
times.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="consumer-config">
|
||||
<title>Configuration</title>
|
||||
<para>
|
||||
Some consumer devices may need to be able to dynamically
|
||||
configure their supplies. For example, MMC drivers may need to
|
||||
select the correct operating voltage for their cards. This may
|
||||
be done while the regulator is enabled or disabled.
|
||||
</para>
|
||||
<para>
|
||||
The <function><link
|
||||
linkend='API-regulator-set-voltage'>regulator_set_voltage</link>
|
||||
</function> and <function><link
|
||||
linkend='API-regulator-set-current-limit'
|
||||
>regulator_set_current_limit</link>
|
||||
</function> functions provide the primary interface for this.
|
||||
Both take ranges of voltages and currents, supporting drivers
|
||||
that do not require a specific value (eg, CPU frequency scaling
|
||||
normally permits the CPU to use a wider range of supply
|
||||
voltages at lower frequencies but does not require that the
|
||||
supply voltage be lowered). Where an exact value is required
|
||||
both minimum and maximum values should be identical.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="consumer-callback">
|
||||
<title>Callbacks</title>
|
||||
<para>
|
||||
Callbacks may also be <link
|
||||
linkend='API-regulator-register-notifier'>registered</link>
|
||||
for events such as regulation failures.
|
||||
</para>
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="driver">
|
||||
<title>Regulator driver interface</title>
|
||||
<para>
|
||||
Drivers for regulator chips <link
|
||||
linkend='API-regulator-register'>register</link> the regulators
|
||||
with the regulator core, providing operations structures to the
|
||||
core. A <link
|
||||
linkend='API-regulator-notifier-call-chain'>notifier</link> interface
|
||||
allows error conditions to be reported to the core.
|
||||
</para>
|
||||
<para>
|
||||
Registration should be triggered by explicit setup done by the
|
||||
platform, supplying a <link
|
||||
linkend='API-struct-regulator-init-data'>struct
|
||||
regulator_init_data</link> for the regulator containing
|
||||
<link linkend='machine-constraint'>constraint</link> and
|
||||
<link linkend='machine-supply'>supply</link> information.
|
||||
</para>
|
||||
</chapter>
|
||||
|
||||
<chapter id="machine">
|
||||
<title>Machine interface</title>
|
||||
<para>
|
||||
This interface provides a way to define how regulators are
|
||||
connected to consumers on a given system and what the valid
|
||||
operating parameters are for the system.
|
||||
</para>
|
||||
|
||||
<sect1 id="machine-supply">
|
||||
<title>Supplies</title>
|
||||
<para>
|
||||
Regulator supplies are specified using <link
|
||||
linkend='API-struct-regulator-consumer-supply'>struct
|
||||
regulator_consumer_supply</link>. This is done at
|
||||
<link linkend='driver'>driver registration
|
||||
time</link> as part of the machine constraints.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="machine-constraint">
|
||||
<title>Constraints</title>
|
||||
<para>
|
||||
As well as definining the connections the machine interface
|
||||
also provides constraints definining the operations that
|
||||
clients are allowed to perform and the parameters that may be
|
||||
set. This is required since generally regulator devices will
|
||||
offer more flexibility than it is safe to use on a given
|
||||
system, for example supporting higher supply voltages than the
|
||||
consumers are rated for.
|
||||
</para>
|
||||
<para>
|
||||
This is done at <link linkend='driver'>driver
|
||||
registration time</link> by providing a <link
|
||||
linkend='API-struct-regulation-constraints'>struct
|
||||
regulation_constraints</link>.
|
||||
</para>
|
||||
<para>
|
||||
The constraints may also specify an initial configuration for the
|
||||
regulator in the constraints, which is particularly useful for
|
||||
use with static consumers.
|
||||
</para>
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="api">
|
||||
<title>API reference</title>
|
||||
<para>
|
||||
Due to limitations of the kernel documentation framework and the
|
||||
existing layout of the source code the entire regulator API is
|
||||
documented here.
|
||||
</para>
|
||||
!Iinclude/linux/regulator/consumer.h
|
||||
!Iinclude/linux/regulator/machine.h
|
||||
!Iinclude/linux/regulator/driver.h
|
||||
!Edrivers/regulator/core.c
|
||||
</chapter>
|
||||
</book>
|
@ -41,6 +41,18 @@ GPL version 2.
|
||||
</abstract>
|
||||
|
||||
<revhistory>
|
||||
<revision>
|
||||
<revnumber>0.7</revnumber>
|
||||
<date>2008-12-23</date>
|
||||
<authorinitials>hjk</authorinitials>
|
||||
<revremark>Added generic platform drivers and offset attribute.</revremark>
|
||||
</revision>
|
||||
<revision>
|
||||
<revnumber>0.6</revnumber>
|
||||
<date>2008-12-05</date>
|
||||
<authorinitials>hjk</authorinitials>
|
||||
<revremark>Added description of portio sysfs attributes.</revremark>
|
||||
</revision>
|
||||
<revision>
|
||||
<revnumber>0.5</revnumber>
|
||||
<date>2008-05-22</date>
|
||||
@ -306,6 +318,16 @@ interested in translating it, please email me
|
||||
pointed to by addr.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<filename>offset</filename>: The offset, in bytes, that has to be
|
||||
added to the pointer returned by <function>mmap()</function> to get
|
||||
to the actual device memory. This is important if the device's memory
|
||||
is not page aligned. Remember that pointers returned by
|
||||
<function>mmap()</function> are always page aligned, so it is good
|
||||
style to always add this offset.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
|
||||
<para>
|
||||
@ -318,6 +340,54 @@ interested in translating it, please email me
|
||||
offset = N * getpagesize();
|
||||
</programlisting>
|
||||
|
||||
<para>
|
||||
Sometimes there is hardware with memory-like regions that can not be
|
||||
mapped with the technique described here, but there are still ways to
|
||||
access them from userspace. The most common example are x86 ioports.
|
||||
On x86 systems, userspace can access these ioports using
|
||||
<function>ioperm()</function>, <function>iopl()</function>,
|
||||
<function>inb()</function>, <function>outb()</function>, and similar
|
||||
functions.
|
||||
</para>
|
||||
<para>
|
||||
Since these ioport regions can not be mapped, they will not appear under
|
||||
<filename>/sys/class/uio/uioX/maps/</filename> like the normal memory
|
||||
described above. Without information about the port regions a hardware
|
||||
has to offer, it becomes difficult for the userspace part of the
|
||||
driver to find out which ports belong to which UIO device.
|
||||
</para>
|
||||
<para>
|
||||
To address this situation, the new directory
|
||||
<filename>/sys/class/uio/uioX/portio/</filename> was added. It only
|
||||
exists if the driver wants to pass information about one or more port
|
||||
regions to userspace. If that is the case, subdirectories named
|
||||
<filename>port0</filename>, <filename>port1</filename>, and so on,
|
||||
will appear underneath
|
||||
<filename>/sys/class/uio/uioX/portio/</filename>.
|
||||
</para>
|
||||
<para>
|
||||
Each <filename>portX/</filename> directory contains three read-only
|
||||
files that show start, size, and type of the port region:
|
||||
</para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
<filename>start</filename>: The first port of this region.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<filename>size</filename>: The number of ports in this region.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<filename>porttype</filename>: A string describing the type of port.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
|
||||
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
@ -339,12 +409,12 @@ offset = N * getpagesize();
|
||||
|
||||
<itemizedlist>
|
||||
<listitem><para>
|
||||
<varname>char *name</varname>: Required. The name of your driver as
|
||||
<varname>const char *name</varname>: Required. The name of your driver as
|
||||
it will appear in sysfs. I recommend using the name of your module for this.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>char *version</varname>: Required. This string appears in
|
||||
<varname>const char *version</varname>: Required. This string appears in
|
||||
<filename>/sys/class/uio/uioX/version</filename>.
|
||||
</para></listitem>
|
||||
|
||||
@ -355,6 +425,13 @@ mapping you need to fill one of the <varname>uio_mem</varname> structures.
|
||||
See the description below for details.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>struct uio_port port[ MAX_UIO_PORTS_REGIONS ]</varname>: Required
|
||||
if you want to pass information about ioports to userspace. For each port
|
||||
region you need to fill one of the <varname>uio_port</varname> structures.
|
||||
See the description below for details.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>long irq</varname>: Required. If your hardware generates an
|
||||
interrupt, it's your modules task to determine the irq number during
|
||||
@ -448,6 +525,42 @@ Please do not touch the <varname>kobj</varname> element of
|
||||
<varname>struct uio_mem</varname>! It is used by the UIO framework
|
||||
to set up sysfs files for this mapping. Simply leave it alone.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Sometimes, your device can have one or more port regions which can not be
|
||||
mapped to userspace. But if there are other possibilities for userspace to
|
||||
access these ports, it makes sense to make information about the ports
|
||||
available in sysfs. For each region, you have to set up a
|
||||
<varname>struct uio_port</varname> in the <varname>port[]</varname> array.
|
||||
Here's a description of the fields of <varname>struct uio_port</varname>:
|
||||
</para>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem><para>
|
||||
<varname>char *porttype</varname>: Required. Set this to one of the predefined
|
||||
constants. Use <varname>UIO_PORT_X86</varname> for the ioports found in x86
|
||||
architectures.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>unsigned long start</varname>: Required if the port region is used.
|
||||
Fill in the number of the first port of this region.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>unsigned long size</varname>: Fill in the number of ports in this
|
||||
region. If <varname>size</varname> is zero, the region is considered unused.
|
||||
Note that you <emphasis>must</emphasis> initialize <varname>size</varname>
|
||||
with zero for all unused regions.
|
||||
</para></listitem>
|
||||
</itemizedlist>
|
||||
|
||||
<para>
|
||||
Please do not touch the <varname>portio</varname> element of
|
||||
<varname>struct uio_port</varname>! It is used internally by the UIO
|
||||
framework to set up sysfs files for this region. Simply leave it alone.
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="adding_irq_handler">
|
||||
@ -497,6 +610,78 @@ to set up sysfs files for this mapping. Simply leave it alone.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="using_uio_pdrv">
|
||||
<title>Using uio_pdrv for platform devices</title>
|
||||
<para>
|
||||
In many cases, UIO drivers for platform devices can be handled in a
|
||||
generic way. In the same place where you define your
|
||||
<varname>struct platform_device</varname>, you simply also implement
|
||||
your interrupt handler and fill your
|
||||
<varname>struct uio_info</varname>. A pointer to this
|
||||
<varname>struct uio_info</varname> is then used as
|
||||
<varname>platform_data</varname> for your platform device.
|
||||
</para>
|
||||
<para>
|
||||
You also need to set up an array of <varname>struct resource</varname>
|
||||
containing addresses and sizes of your memory mappings. This
|
||||
information is passed to the driver using the
|
||||
<varname>.resource</varname> and <varname>.num_resources</varname>
|
||||
elements of <varname>struct platform_device</varname>.
|
||||
</para>
|
||||
<para>
|
||||
You now have to set the <varname>.name</varname> element of
|
||||
<varname>struct platform_device</varname> to
|
||||
<varname>"uio_pdrv"</varname> to use the generic UIO platform device
|
||||
driver. This driver will fill the <varname>mem[]</varname> array
|
||||
according to the resources given, and register the device.
|
||||
</para>
|
||||
<para>
|
||||
The advantage of this approach is that you only have to edit a file
|
||||
you need to edit anyway. You do not have to create an extra driver.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="using_uio_pdrv_genirq">
|
||||
<title>Using uio_pdrv_genirq for platform devices</title>
|
||||
<para>
|
||||
Especially in embedded devices, you frequently find chips where the
|
||||
irq pin is tied to its own dedicated interrupt line. In such cases,
|
||||
where you can be really sure the interrupt is not shared, we can take
|
||||
the concept of <varname>uio_pdrv</varname> one step further and use a
|
||||
generic interrupt handler. That's what
|
||||
<varname>uio_pdrv_genirq</varname> does.
|
||||
</para>
|
||||
<para>
|
||||
The setup for this driver is the same as described above for
|
||||
<varname>uio_pdrv</varname>, except that you do not implement an
|
||||
interrupt handler. The <varname>.handler</varname> element of
|
||||
<varname>struct uio_info</varname> must remain
|
||||
<varname>NULL</varname>. The <varname>.irq_flags</varname> element
|
||||
must not contain <varname>IRQF_SHARED</varname>.
|
||||
</para>
|
||||
<para>
|
||||
You will set the <varname>.name</varname> element of
|
||||
<varname>struct platform_device</varname> to
|
||||
<varname>"uio_pdrv_genirq"</varname> to use this driver.
|
||||
</para>
|
||||
<para>
|
||||
The generic interrupt handler of <varname>uio_pdrv_genirq</varname>
|
||||
will simply disable the interrupt line using
|
||||
<function>disable_irq_nosync()</function>. After doing its work,
|
||||
userspace can reenable the interrupt by writing 0x00000001 to the UIO
|
||||
device file. The driver already implements an
|
||||
<function>irq_control()</function> to make this possible, you must not
|
||||
implement your own.
|
||||
</para>
|
||||
<para>
|
||||
Using <varname>uio_pdrv_genirq</varname> not only saves a few lines of
|
||||
interrupt handler code. You also do not need to know anything about
|
||||
the chip's internal registers to create the kernel part of the driver.
|
||||
All you need to know is the irq number of the pin the chip is
|
||||
connected to.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
</chapter>
|
||||
|
||||
<chapter id="userspace_driver" xreflabel="Writing a driver in user space">
|
||||
|
@ -1,6 +1,6 @@
|
||||
[ NOTE: The virt_to_bus() and bus_to_virt() functions have been
|
||||
superseded by the functionality provided by the PCI DMA
|
||||
interface (see Documentation/DMA-mapping.txt). They continue
|
||||
superseded by the functionality provided by the PCI DMA interface
|
||||
(see Documentation/PCI/PCI-DMA-mapping.txt). They continue
|
||||
to be documented below for historical purposes, but new code
|
||||
must not use them. --davidm 00/12/12 ]
|
||||
|
||||
|
@ -294,7 +294,8 @@ NOTE: pci_enable_device() can fail! Check the return value.
|
||||
|
||||
pci_set_master() will enable DMA by setting the bus master bit
|
||||
in the PCI_COMMAND register. It also fixes the latency timer value if
|
||||
it's set to something bogus by the BIOS.
|
||||
it's set to something bogus by the BIOS. pci_clear_master() will
|
||||
disable DMA by clearing the bus master bit.
|
||||
|
||||
If the PCI device can use the PCI Memory-Write-Invalidate transaction,
|
||||
call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval
|
||||
|
@ -12,6 +12,8 @@ rcuref.txt
|
||||
- Reference-count design for elements of lists/arrays protected by RCU
|
||||
rcu.txt
|
||||
- RCU Concepts
|
||||
rcubarrier.txt
|
||||
- Unloading modules that use RCU callbacks
|
||||
RTFP.txt
|
||||
- List of RCU papers (bibliography) going back to 1980.
|
||||
torture.txt
|
||||
|
304
Documentation/RCU/rcubarrier.txt
Normal file
304
Documentation/RCU/rcubarrier.txt
Normal file
@ -0,0 +1,304 @@
|
||||
RCU and Unloadable Modules
|
||||
|
||||
[Originally published in LWN Jan. 14, 2007: http://lwn.net/Articles/217484/]
|
||||
|
||||
RCU (read-copy update) is a synchronization mechanism that can be thought
|
||||
of as a replacement for read-writer locking (among other things), but with
|
||||
very low-overhead readers that are immune to deadlock, priority inversion,
|
||||
and unbounded latency. RCU read-side critical sections are delimited
|
||||
by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT
|
||||
kernels, generate no code whatsoever.
|
||||
|
||||
This means that RCU writers are unaware of the presence of concurrent
|
||||
readers, so that RCU updates to shared data must be undertaken quite
|
||||
carefully, leaving an old version of the data structure in place until all
|
||||
pre-existing readers have finished. These old versions are needed because
|
||||
such readers might hold a reference to them. RCU updates can therefore be
|
||||
rather expensive, and RCU is thus best suited for read-mostly situations.
|
||||
|
||||
How can an RCU writer possibly determine when all readers are finished,
|
||||
given that readers might well leave absolutely no trace of their
|
||||
presence? There is a synchronize_rcu() primitive that blocks until all
|
||||
pre-existing readers have completed. An updater wishing to delete an
|
||||
element p from a linked list might do the following, while holding an
|
||||
appropriate lock, of course:
|
||||
|
||||
list_del_rcu(p);
|
||||
synchronize_rcu();
|
||||
kfree(p);
|
||||
|
||||
But the above code cannot be used in IRQ context -- the call_rcu()
|
||||
primitive must be used instead. This primitive takes a pointer to an
|
||||
rcu_head struct placed within the RCU-protected data structure and
|
||||
another pointer to a function that may be invoked later to free that
|
||||
structure. Code to delete an element p from the linked list from IRQ
|
||||
context might then be as follows:
|
||||
|
||||
list_del_rcu(p);
|
||||
call_rcu(&p->rcu, p_callback);
|
||||
|
||||
Since call_rcu() never blocks, this code can safely be used from within
|
||||
IRQ context. The function p_callback() might be defined as follows:
|
||||
|
||||
static void p_callback(struct rcu_head *rp)
|
||||
{
|
||||
struct pstruct *p = container_of(rp, struct pstruct, rcu);
|
||||
|
||||
kfree(p);
|
||||
}
|
||||
|
||||
|
||||
Unloading Modules That Use call_rcu()
|
||||
|
||||
But what if p_callback is defined in an unloadable module?
|
||||
|
||||
If we unload the module while some RCU callbacks are pending,
|
||||
the CPUs executing these callbacks are going to be severely
|
||||
disappointed when they are later invoked, as fancifully depicted at
|
||||
http://lwn.net/images/ns/kernel/rcu-drop.jpg.
|
||||
|
||||
We could try placing a synchronize_rcu() in the module-exit code path,
|
||||
but this is not sufficient. Although synchronize_rcu() does wait for a
|
||||
grace period to elapse, it does not wait for the callbacks to complete.
|
||||
|
||||
One might be tempted to try several back-to-back synchronize_rcu()
|
||||
calls, but this is still not guaranteed to work. If there is a very
|
||||
heavy RCU-callback load, then some of the callbacks might be deferred
|
||||
in order to allow other processing to proceed. Such deferral is required
|
||||
in realtime kernels in order to avoid excessive scheduling latencies.
|
||||
|
||||
|
||||
rcu_barrier()
|
||||
|
||||
We instead need the rcu_barrier() primitive. This primitive is similar
|
||||
to synchronize_rcu(), but instead of waiting solely for a grace
|
||||
period to elapse, it also waits for all outstanding RCU callbacks to
|
||||
complete. Pseudo-code using rcu_barrier() is as follows:
|
||||
|
||||
1. Prevent any new RCU callbacks from being posted.
|
||||
2. Execute rcu_barrier().
|
||||
3. Allow the module to be unloaded.
|
||||
|
||||
Quick Quiz #1: Why is there no srcu_barrier()?
|
||||
|
||||
The rcutorture module makes use of rcu_barrier in its exit function
|
||||
as follows:
|
||||
|
||||
1 static void
|
||||
2 rcu_torture_cleanup(void)
|
||||
3 {
|
||||
4 int i;
|
||||
5
|
||||
6 fullstop = 1;
|
||||
7 if (shuffler_task != NULL) {
|
||||
8 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
|
||||
9 kthread_stop(shuffler_task);
|
||||
10 }
|
||||
11 shuffler_task = NULL;
|
||||
12
|
||||
13 if (writer_task != NULL) {
|
||||
14 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
|
||||
15 kthread_stop(writer_task);
|
||||
16 }
|
||||
17 writer_task = NULL;
|
||||
18
|
||||
19 if (reader_tasks != NULL) {
|
||||
20 for (i = 0; i < nrealreaders; i++) {
|
||||
21 if (reader_tasks[i] != NULL) {
|
||||
22 VERBOSE_PRINTK_STRING(
|
||||
23 "Stopping rcu_torture_reader task");
|
||||
24 kthread_stop(reader_tasks[i]);
|
||||
25 }
|
||||
26 reader_tasks[i] = NULL;
|
||||
27 }
|
||||
28 kfree(reader_tasks);
|
||||
29 reader_tasks = NULL;
|
||||
30 }
|
||||
31 rcu_torture_current = NULL;
|
||||
32
|
||||
33 if (fakewriter_tasks != NULL) {
|
||||
34 for (i = 0; i < nfakewriters; i++) {
|
||||
35 if (fakewriter_tasks[i] != NULL) {
|
||||
36 VERBOSE_PRINTK_STRING(
|
||||
37 "Stopping rcu_torture_fakewriter task");
|
||||
38 kthread_stop(fakewriter_tasks[i]);
|
||||
39 }
|
||||
40 fakewriter_tasks[i] = NULL;
|
||||
41 }
|
||||
42 kfree(fakewriter_tasks);
|
||||
43 fakewriter_tasks = NULL;
|
||||
44 }
|
||||
45
|
||||
46 if (stats_task != NULL) {
|
||||
47 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
|
||||
48 kthread_stop(stats_task);
|
||||
49 }
|
||||
50 stats_task = NULL;
|
||||
51
|
||||
52 /* Wait for all RCU callbacks to fire. */
|
||||
53 rcu_barrier();
|
||||
54
|
||||
55 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
|
||||
56
|
||||
57 if (cur_ops->cleanup != NULL)
|
||||
58 cur_ops->cleanup();
|
||||
59 if (atomic_read(&n_rcu_torture_error))
|
||||
60 rcu_torture_print_module_parms("End of test: FAILURE");
|
||||
61 else
|
||||
62 rcu_torture_print_module_parms("End of test: SUCCESS");
|
||||
63 }
|
||||
|
||||
Line 6 sets a global variable that prevents any RCU callbacks from
|
||||
re-posting themselves. This will not be necessary in most cases, since
|
||||
RCU callbacks rarely include calls to call_rcu(). However, the rcutorture
|
||||
module is an exception to this rule, and therefore needs to set this
|
||||
global variable.
|
||||
|
||||
Lines 7-50 stop all the kernel tasks associated with the rcutorture
|
||||
module. Therefore, once execution reaches line 53, no more rcutorture
|
||||
RCU callbacks will be posted. The rcu_barrier() call on line 53 waits
|
||||
for any pre-existing callbacks to complete.
|
||||
|
||||
Then lines 55-62 print status and do operation-specific cleanup, and
|
||||
then return, permitting the module-unload operation to be completed.
|
||||
|
||||
Quick Quiz #2: Is there any other situation where rcu_barrier() might
|
||||
be required?
|
||||
|
||||
Your module might have additional complications. For example, if your
|
||||
module invokes call_rcu() from timers, you will need to first cancel all
|
||||
the timers, and only then invoke rcu_barrier() to wait for any remaining
|
||||
RCU callbacks to complete.
|
||||
|
||||
|
||||
Implementing rcu_barrier()
|
||||
|
||||
Dipankar Sarma's implementation of rcu_barrier() makes use of the fact
|
||||
that RCU callbacks are never reordered once queued on one of the per-CPU
|
||||
queues. His implementation queues an RCU callback on each of the per-CPU
|
||||
callback queues, and then waits until they have all started executing, at
|
||||
which point, all earlier RCU callbacks are guaranteed to have completed.
|
||||
|
||||
The original code for rcu_barrier() was as follows:
|
||||
|
||||
1 void rcu_barrier(void)
|
||||
2 {
|
||||
3 BUG_ON(in_interrupt());
|
||||
4 /* Take cpucontrol mutex to protect against CPU hotplug */
|
||||
5 mutex_lock(&rcu_barrier_mutex);
|
||||
6 init_completion(&rcu_barrier_completion);
|
||||
7 atomic_set(&rcu_barrier_cpu_count, 0);
|
||||
8 on_each_cpu(rcu_barrier_func, NULL, 0, 1);
|
||||
9 wait_for_completion(&rcu_barrier_completion);
|
||||
10 mutex_unlock(&rcu_barrier_mutex);
|
||||
11 }
|
||||
|
||||
Line 3 verifies that the caller is in process context, and lines 5 and 10
|
||||
use rcu_barrier_mutex to ensure that only one rcu_barrier() is using the
|
||||
global completion and counters at a time, which are initialized on lines
|
||||
6 and 7. Line 8 causes each CPU to invoke rcu_barrier_func(), which is
|
||||
shown below. Note that the final "1" in on_each_cpu()'s argument list
|
||||
ensures that all the calls to rcu_barrier_func() will have completed
|
||||
before on_each_cpu() returns. Line 9 then waits for the completion.
|
||||
|
||||
This code was rewritten in 2008 to support rcu_barrier_bh() and
|
||||
rcu_barrier_sched() in addition to the original rcu_barrier().
|
||||
|
||||
The rcu_barrier_func() runs on each CPU, where it invokes call_rcu()
|
||||
to post an RCU callback, as follows:
|
||||
|
||||
1 static void rcu_barrier_func(void *notused)
|
||||
2 {
|
||||
3 int cpu = smp_processor_id();
|
||||
4 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
5 struct rcu_head *head;
|
||||
6
|
||||
7 head = &rdp->barrier;
|
||||
8 atomic_inc(&rcu_barrier_cpu_count);
|
||||
9 call_rcu(head, rcu_barrier_callback);
|
||||
10 }
|
||||
|
||||
Lines 3 and 4 locate RCU's internal per-CPU rcu_data structure,
|
||||
which contains the struct rcu_head that needed for the later call to
|
||||
call_rcu(). Line 7 picks up a pointer to this struct rcu_head, and line
|
||||
8 increments a global counter. This counter will later be decremented
|
||||
by the callback. Line 9 then registers the rcu_barrier_callback() on
|
||||
the current CPU's queue.
|
||||
|
||||
The rcu_barrier_callback() function simply atomically decrements the
|
||||
rcu_barrier_cpu_count variable and finalizes the completion when it
|
||||
reaches zero, as follows:
|
||||
|
||||
1 static void rcu_barrier_callback(struct rcu_head *notused)
|
||||
2 {
|
||||
3 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
|
||||
4 complete(&rcu_barrier_completion);
|
||||
5 }
|
||||
|
||||
Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes
|
||||
immediately (thus incrementing rcu_barrier_cpu_count to the
|
||||
value one), but the other CPU's rcu_barrier_func() invocations
|
||||
are delayed for a full grace period? Couldn't this result in
|
||||
rcu_barrier() returning prematurely?
|
||||
|
||||
|
||||
rcu_barrier() Summary
|
||||
|
||||
The rcu_barrier() primitive has seen relatively little use, since most
|
||||
code using RCU is in the core kernel rather than in modules. However, if
|
||||
you are using RCU from an unloadable module, you need to use rcu_barrier()
|
||||
so that your module may be safely unloaded.
|
||||
|
||||
|
||||
Answers to Quick Quizzes
|
||||
|
||||
Quick Quiz #1: Why is there no srcu_barrier()?
|
||||
|
||||
Answer: Since there is no call_srcu(), there can be no outstanding SRCU
|
||||
callbacks. Therefore, there is no need to wait for them.
|
||||
|
||||
Quick Quiz #2: Is there any other situation where rcu_barrier() might
|
||||
be required?
|
||||
|
||||
Answer: Interestingly enough, rcu_barrier() was not originally
|
||||
implemented for module unloading. Nikita Danilov was using
|
||||
RCU in a filesystem, which resulted in a similar situation at
|
||||
filesystem-unmount time. Dipankar Sarma coded up rcu_barrier()
|
||||
in response, so that Nikita could invoke it during the
|
||||
filesystem-unmount process.
|
||||
|
||||
Much later, yours truly hit the RCU module-unload problem when
|
||||
implementing rcutorture, and found that rcu_barrier() solves
|
||||
this problem as well.
|
||||
|
||||
Quick Quiz #3: What happens if CPU 0's rcu_barrier_func() executes
|
||||
immediately (thus incrementing rcu_barrier_cpu_count to the
|
||||
value one), but the other CPU's rcu_barrier_func() invocations
|
||||
are delayed for a full grace period? Couldn't this result in
|
||||
rcu_barrier() returning prematurely?
|
||||
|
||||
Answer: This cannot happen. The reason is that on_each_cpu() has its last
|
||||
argument, the wait flag, set to "1". This flag is passed through
|
||||
to smp_call_function() and further to smp_call_function_on_cpu(),
|
||||
causing this latter to spin until the cross-CPU invocation of
|
||||
rcu_barrier_func() has completed. This by itself would prevent
|
||||
a grace period from completing on non-CONFIG_PREEMPT kernels,
|
||||
since each CPU must undergo a context switch (or other quiescent
|
||||
state) before the grace period can complete. However, this is
|
||||
of no use in CONFIG_PREEMPT kernels.
|
||||
|
||||
Therefore, on_each_cpu() disables preemption across its call
|
||||
to smp_call_function() and also across the local call to
|
||||
rcu_barrier_func(). This prevents the local CPU from context
|
||||
switching, again preventing grace periods from completing. This
|
||||
means that all CPUs have executed rcu_barrier_func() before
|
||||
the first rcu_barrier_callback() can possibly execute, in turn
|
||||
preventing rcu_barrier_cpu_count from prematurely reaching zero.
|
||||
|
||||
Currently, -rt implementations of RCU keep but a single global
|
||||
queue for RCU callbacks, and thus do not suffer from this
|
||||
problem. However, when the -rt RCU eventually does have per-CPU
|
||||
callback queues, things will have to change. One simple change
|
||||
is to add an rcu_read_lock() before line 8 of rcu_barrier()
|
||||
and an rcu_read_unlock() after line 8 of this same function. If
|
||||
you can think of a better change, please let me know!
|
@ -392,6 +392,10 @@ int main(int argc, char *argv[])
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
if (!maskset && !tid && !containerset) {
|
||||
usage();
|
||||
goto err;
|
||||
}
|
||||
|
||||
do {
|
||||
int i;
|
||||
|
45
Documentation/bad_memory.txt
Normal file
45
Documentation/bad_memory.txt
Normal file
@ -0,0 +1,45 @@
|
||||
March 2008
|
||||
Jan-Simon Moeller, dl9pf@gmx.de
|
||||
|
||||
|
||||
How to deal with bad memory e.g. reported by memtest86+ ?
|
||||
#########################################################
|
||||
|
||||
There are three possibilities I know of:
|
||||
|
||||
1) Reinsert/swap the memory modules
|
||||
|
||||
2) Buy new modules (best!) or try to exchange the memory
|
||||
if you have spare-parts
|
||||
|
||||
3) Use BadRAM or memmap
|
||||
|
||||
This Howto is about number 3) .
|
||||
|
||||
|
||||
BadRAM
|
||||
######
|
||||
BadRAM is the actively developed and available as kernel-patch
|
||||
here: http://rick.vanrein.org/linux/badram/
|
||||
|
||||
For more details see the BadRAM documentation.
|
||||
|
||||
memmap
|
||||
######
|
||||
|
||||
memmap is already in the kernel and usable as kernel-parameter at
|
||||
boot-time. Its syntax is slightly strange and you may need to
|
||||
calculate the values by yourself!
|
||||
|
||||
Syntax to exclude a memory area (see kernel-parameters.txt for details):
|
||||
memmap=<size>$<address>
|
||||
|
||||
Example: memtest86+ reported here errors at address 0x18691458, 0x18698424 and
|
||||
some others. All had 0x1869xxxx in common, so I chose a pattern of
|
||||
0x18690000,0xffff0000.
|
||||
|
||||
With the numbers of the example above:
|
||||
memmap=64K$0x18690000
|
||||
or
|
||||
memmap=0x10000$0x18690000
|
||||
|
@ -9,3 +9,6 @@ cachefeatures.txt
|
||||
|
||||
Filesystems
|
||||
- Requirements for mounting the root file system.
|
||||
|
||||
bfin-gpio-note.txt
|
||||
- Notes in developing/using bfin-gpio driver.
|
||||
|
71
Documentation/blackfin/bfin-gpio-notes.txt
Normal file
71
Documentation/blackfin/bfin-gpio-notes.txt
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* File: Documentation/blackfin/bfin-gpio-note.txt
|
||||
* Based on:
|
||||
* Author:
|
||||
*
|
||||
* Created: $Id: bfin-gpio-note.txt 2008-11-24 16:42 grafyang $
|
||||
* Description: This file contains the notes in developing/using bfin-gpio.
|
||||
*
|
||||
*
|
||||
* Rev:
|
||||
*
|
||||
* Modified:
|
||||
* Copyright 2004-2008 Analog Devices Inc.
|
||||
*
|
||||
* Bugs: Enter bugs at http://blackfin.uclinux.org/
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
1. Blackfin GPIO introduction
|
||||
|
||||
There are many GPIO pins on Blackfin. Most of these pins are muxed to
|
||||
multi-functions. They can be configured as peripheral, or just as GPIO,
|
||||
configured to input with interrupt enabled, or output.
|
||||
|
||||
For detailed information, please see "arch/blackfin/kernel/bfin_gpio.c",
|
||||
or the relevant HRM.
|
||||
|
||||
|
||||
2. Avoiding resource conflict
|
||||
|
||||
Followed function groups are used to avoiding resource conflict,
|
||||
- Use the pin as peripheral,
|
||||
int peripheral_request(unsigned short per, const char *label);
|
||||
int peripheral_request_list(const unsigned short per[], const char *label);
|
||||
void peripheral_free(unsigned short per);
|
||||
void peripheral_free_list(const unsigned short per[]);
|
||||
- Use the pin as GPIO,
|
||||
int bfin_gpio_request(unsigned gpio, const char *label);
|
||||
void bfin_gpio_free(unsigned gpio);
|
||||
- Use the pin as GPIO interrupt,
|
||||
int bfin_gpio_irq_request(unsigned gpio, const char *label);
|
||||
void bfin_gpio_irq_free(unsigned gpio);
|
||||
|
||||
The request functions will record the function state for a certain pin,
|
||||
the free functions will clear it's function state.
|
||||
Once a pin is requested, it can't be requested again before it is freed by
|
||||
previous caller, otherwise kernel will dump stacks, and the request
|
||||
function fail.
|
||||
These functions are wrapped by other functions, most of the users need not
|
||||
care.
|
||||
|
||||
|
||||
3. But there are some exceptions
|
||||
- Kernel permit the identical GPIO be requested both as GPIO and GPIO
|
||||
interrut.
|
||||
Some drivers, like gpio-keys, need this behavior. Kernel only print out
|
||||
warning messages like,
|
||||
bfin-gpio: GPIO 24 is already reserved by gpio-keys: BTN0, and you are
|
||||
configuring it as IRQ!
|
||||
|
||||
Note: Consider the case that, if there are two drivers need the
|
||||
identical GPIO, one of them use it as GPIO, the other use it as
|
||||
GPIO interrupt. This will really cause resource conflict. So if
|
||||
there is any abnormal driver behavior, please check the bfin-gpio
|
||||
warning messages.
|
||||
|
||||
- Kernel permit the identical GPIO be requested from the same driver twice.
|
||||
|
||||
|
||||
|
@ -186,8 +186,9 @@ a virtual address mapping (unlike the earlier scheme of virtual address
|
||||
do not have a corresponding kernel virtual address space mapping) and
|
||||
low-memory pages.
|
||||
|
||||
Note: Please refer to DMA-mapping.txt for a discussion on PCI high mem DMA
|
||||
aspects and mapping of scatter gather lists, and support for 64 bit PCI.
|
||||
Note: Please refer to Documentation/PCI/PCI-DMA-mapping.txt for a discussion
|
||||
on PCI high mem DMA aspects and mapping of scatter gather lists, and support
|
||||
for 64 bit PCI.
|
||||
|
||||
Special handling is required only for cases where i/o needs to happen on
|
||||
pages at physical memory addresses beyond what the device can support. In these
|
||||
@ -953,14 +954,14 @@ elevator_allow_merge_fn called whenever the block layer determines
|
||||
results in some sort of conflict internally,
|
||||
this hook allows it to do that.
|
||||
|
||||
elevator_dispatch_fn fills the dispatch queue with ready requests.
|
||||
elevator_dispatch_fn* fills the dispatch queue with ready requests.
|
||||
I/O schedulers are free to postpone requests by
|
||||
not filling the dispatch queue unless @force
|
||||
is non-zero. Once dispatched, I/O schedulers
|
||||
are not allowed to manipulate the requests -
|
||||
they belong to generic dispatch queue.
|
||||
|
||||
elevator_add_req_fn called to add a new request into the scheduler
|
||||
elevator_add_req_fn* called to add a new request into the scheduler
|
||||
|
||||
elevator_queue_empty_fn returns true if the merge queue is empty.
|
||||
Drivers shouldn't use this, but rather check
|
||||
@ -990,7 +991,7 @@ elevator_activate_req_fn Called when device driver first sees a request.
|
||||
elevator_deactivate_req_fn Called when device driver decides to delay
|
||||
a request by requeueing it.
|
||||
|
||||
elevator_init_fn
|
||||
elevator_init_fn*
|
||||
elevator_exit_fn Allocate and free any elevator specific storage
|
||||
for a queue.
|
||||
|
||||
|
63
Documentation/block/queue-sysfs.txt
Normal file
63
Documentation/block/queue-sysfs.txt
Normal file
@ -0,0 +1,63 @@
|
||||
Queue sysfs files
|
||||
=================
|
||||
|
||||
This text file will detail the queue files that are located in the sysfs tree
|
||||
for each block device. Note that stacked devices typically do not export
|
||||
any settings, since their queue merely functions are a remapping target.
|
||||
These files are the ones found in the /sys/block/xxx/queue/ directory.
|
||||
|
||||
Files denoted with a RO postfix are readonly and the RW postfix means
|
||||
read-write.
|
||||
|
||||
hw_sector_size (RO)
|
||||
-------------------
|
||||
This is the hardware sector size of the device, in bytes.
|
||||
|
||||
max_hw_sectors_kb (RO)
|
||||
----------------------
|
||||
This is the maximum number of kilobytes supported in a single data transfer.
|
||||
|
||||
max_sectors_kb (RW)
|
||||
-------------------
|
||||
This is the maximum number of kilobytes that the block layer will allow
|
||||
for a filesystem request. Must be smaller than or equal to the maximum
|
||||
size allowed by the hardware.
|
||||
|
||||
nomerges (RW)
|
||||
-------------
|
||||
This enables the user to disable the lookup logic involved with IO merging
|
||||
requests in the block layer. Merging may still occur through a direct
|
||||
1-hit cache, since that comes for (almost) free. The IO scheduler will not
|
||||
waste cycles doing tree/hash lookups for merges if nomerges is 1. Defaults
|
||||
to 0, enabling all merges.
|
||||
|
||||
nr_requests (RW)
|
||||
----------------
|
||||
This controls how many requests may be allocated in the block layer for
|
||||
read or write requests. Note that the total allocated number may be twice
|
||||
this amount, since it applies only to reads or writes (not the accumulated
|
||||
sum).
|
||||
|
||||
read_ahead_kb (RW)
|
||||
------------------
|
||||
Maximum number of kilobytes to read-ahead for filesystems on this block
|
||||
device.
|
||||
|
||||
rq_affinity (RW)
|
||||
----------------
|
||||
If this option is enabled, the block layer will migrate request completions
|
||||
to the CPU that originally submitted the request. For some workloads
|
||||
this provides a significant reduction in CPU cycles due to caching effects.
|
||||
|
||||
scheduler (RW)
|
||||
--------------
|
||||
When read, this file will display the current and available IO schedulers
|
||||
for this block device. The currently active IO scheduler will be enclosed
|
||||
in [] brackets. Writing an IO scheduler name to this file will switch
|
||||
control of this block device to that new IO scheduler. Note that writing
|
||||
an IO scheduler name to this file will attempt to load that IO scheduler
|
||||
module, if it isn't already present in the system.
|
||||
|
||||
|
||||
|
||||
Jens Axboe <jens.axboe@oracle.com>, February 2009
|
@ -1,7 +1,8 @@
|
||||
CGROUPS
|
||||
-------
|
||||
|
||||
Written by Paul Menage <menage@google.com> based on Documentation/cpusets.txt
|
||||
Written by Paul Menage <menage@google.com> based on
|
||||
Documentation/cgroups/cpusets.txt
|
||||
|
||||
Original copyright statements from cpusets.txt:
|
||||
Portions Copyright (C) 2004 BULL SA.
|
||||
@ -68,7 +69,7 @@ On their own, the only use for cgroups is for simple job
|
||||
tracking. The intention is that other subsystems hook into the generic
|
||||
cgroup support to provide new attributes for cgroups, such as
|
||||
accounting/limiting the resources which processes in a cgroup can
|
||||
access. For example, cpusets (see Documentation/cpusets.txt) allows
|
||||
access. For example, cpusets (see Documentation/cgroups/cpusets.txt) allows
|
||||
you to associate a set of CPUs and a set of memory nodes with the
|
||||
tasks in each cgroup.
|
||||
|
||||
@ -227,7 +228,6 @@ Each cgroup is represented by a directory in the cgroup file system
|
||||
containing the following files describing that cgroup:
|
||||
|
||||
- tasks: list of tasks (by pid) attached to that cgroup
|
||||
- releasable flag: cgroup currently removeable?
|
||||
- notify_on_release flag: run the release agent on exit?
|
||||
- release_agent: the path to use for release notifications (this file
|
||||
exists in the top cgroup only)
|
||||
@ -360,7 +360,7 @@ Now you want to do something with this cgroup.
|
||||
|
||||
In this directory you can find several files:
|
||||
# ls
|
||||
notify_on_release releasable tasks
|
||||
notify_on_release tasks
|
||||
(plus whatever files added by the attached subsystems)
|
||||
|
||||
Now attach your shell to this cgroup:
|
||||
@ -479,7 +479,6 @@ newly-created cgroup if an error occurs after this subsystem's
|
||||
create() method has been called for the new cgroup).
|
||||
|
||||
void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
|
||||
(cgroup_mutex held by caller)
|
||||
|
||||
Called before checking the reference count on each subsystem. This may
|
||||
be useful for subsystems which have some extra references even if
|
||||
@ -498,6 +497,7 @@ remain valid while the caller holds cgroup_mutex.
|
||||
|
||||
void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
|
||||
struct cgroup *old_cgrp, struct task_struct *task)
|
||||
(cgroup_mutex held by caller)
|
||||
|
||||
Called after the task has been attached to the cgroup, to allow any
|
||||
post-attachment activity that requires memory allocations or blocking.
|
||||
@ -511,6 +511,7 @@ void exit(struct cgroup_subsys *ss, struct task_struct *task)
|
||||
Called during task exit.
|
||||
|
||||
int populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
||||
(cgroup_mutex held by caller)
|
||||
|
||||
Called after creation of a cgroup to allow a subsystem to populate
|
||||
the cgroup directory with file entries. The subsystem should make
|
||||
@ -520,6 +521,7 @@ method can return an error code, the error code is currently not
|
||||
always handled well.
|
||||
|
||||
void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
||||
(cgroup_mutex held by caller)
|
||||
|
||||
Called at the end of cgroup_clone() to do any paramater
|
||||
initialization which might be required before a task could attach. For
|
||||
@ -527,7 +529,7 @@ example in cpusets, no task may attach before 'cpus' and 'mems' are set
|
||||
up.
|
||||
|
||||
void bind(struct cgroup_subsys *ss, struct cgroup *root)
|
||||
(cgroup_mutex held by caller)
|
||||
(cgroup_mutex and ss->hierarchy_mutex held by caller)
|
||||
|
||||
Called when a cgroup subsystem is rebound to a different hierarchy
|
||||
and root cgroup. Currently this will only involve movement between
|
||||
|
362
Documentation/cgroups/memcg_test.txt
Normal file
362
Documentation/cgroups/memcg_test.txt
Normal file
@ -0,0 +1,362 @@
|
||||
Memory Resource Controller(Memcg) Implementation Memo.
|
||||
Last Updated: 2009/1/19
|
||||
Base Kernel Version: based on 2.6.29-rc2.
|
||||
|
||||
Because VM is getting complex (one of reasons is memcg...), memcg's behavior
|
||||
is complex. This is a document for memcg's internal behavior.
|
||||
Please note that implementation details can be changed.
|
||||
|
||||
(*) Topics on API should be in Documentation/cgroups/memory.txt)
|
||||
|
||||
0. How to record usage ?
|
||||
2 objects are used.
|
||||
|
||||
page_cgroup ....an object per page.
|
||||
Allocated at boot or memory hotplug. Freed at memory hot removal.
|
||||
|
||||
swap_cgroup ... an entry per swp_entry.
|
||||
Allocated at swapon(). Freed at swapoff().
|
||||
|
||||
The page_cgroup has USED bit and double count against a page_cgroup never
|
||||
occurs. swap_cgroup is used only when a charged page is swapped-out.
|
||||
|
||||
1. Charge
|
||||
|
||||
a page/swp_entry may be charged (usage += PAGE_SIZE) at
|
||||
|
||||
mem_cgroup_newpage_charge()
|
||||
Called at new page fault and Copy-On-Write.
|
||||
|
||||
mem_cgroup_try_charge_swapin()
|
||||
Called at do_swap_page() (page fault on swap entry) and swapoff.
|
||||
Followed by charge-commit-cancel protocol. (With swap accounting)
|
||||
At commit, a charge recorded in swap_cgroup is removed.
|
||||
|
||||
mem_cgroup_cache_charge()
|
||||
Called at add_to_page_cache()
|
||||
|
||||
mem_cgroup_cache_charge_swapin()
|
||||
Called at shmem's swapin.
|
||||
|
||||
mem_cgroup_prepare_migration()
|
||||
Called before migration. "extra" charge is done and followed by
|
||||
charge-commit-cancel protocol.
|
||||
At commit, charge against oldpage or newpage will be committed.
|
||||
|
||||
2. Uncharge
|
||||
a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
|
||||
|
||||
mem_cgroup_uncharge_page()
|
||||
Called when an anonymous page is fully unmapped. I.e., mapcount goes
|
||||
to 0. If the page is SwapCache, uncharge is delayed until
|
||||
mem_cgroup_uncharge_swapcache().
|
||||
|
||||
mem_cgroup_uncharge_cache_page()
|
||||
Called when a page-cache is deleted from radix-tree. If the page is
|
||||
SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache().
|
||||
|
||||
mem_cgroup_uncharge_swapcache()
|
||||
Called when SwapCache is removed from radix-tree. The charge itself
|
||||
is moved to swap_cgroup. (If mem+swap controller is disabled, no
|
||||
charge to swap occurs.)
|
||||
|
||||
mem_cgroup_uncharge_swap()
|
||||
Called when swp_entry's refcnt goes down to 0. A charge against swap
|
||||
disappears.
|
||||
|
||||
mem_cgroup_end_migration(old, new)
|
||||
At success of migration old is uncharged (if necessary), a charge
|
||||
to new page is committed. At failure, charge to old page is committed.
|
||||
|
||||
3. charge-commit-cancel
|
||||
In some case, we can't know this "charge" is valid or not at charging
|
||||
(because of races).
|
||||
To handle such case, there are charge-commit-cancel functions.
|
||||
mem_cgroup_try_charge_XXX
|
||||
mem_cgroup_commit_charge_XXX
|
||||
mem_cgroup_cancel_charge_XXX
|
||||
these are used in swap-in and migration.
|
||||
|
||||
At try_charge(), there are no flags to say "this page is charged".
|
||||
at this point, usage += PAGE_SIZE.
|
||||
|
||||
At commit(), the function checks the page should be charged or not
|
||||
and set flags or avoid charging.(usage -= PAGE_SIZE)
|
||||
|
||||
At cancel(), simply usage -= PAGE_SIZE.
|
||||
|
||||
Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
|
||||
|
||||
4. Anonymous
|
||||
Anonymous page is newly allocated at
|
||||
- page fault into MAP_ANONYMOUS mapping.
|
||||
- Copy-On-Write.
|
||||
It is charged right after it's allocated before doing any page table
|
||||
related operations. Of course, it's uncharged when another page is used
|
||||
for the fault address.
|
||||
|
||||
At freeing anonymous page (by exit() or munmap()), zap_pte() is called
|
||||
and pages for ptes are freed one by one.(see mm/memory.c). Uncharges
|
||||
are done at page_remove_rmap() when page_mapcount() goes down to 0.
|
||||
|
||||
Another page freeing is by page-reclaim (vmscan.c) and anonymous
|
||||
pages are swapped out. In this case, the page is marked as
|
||||
PageSwapCache(). uncharge() routine doesn't uncharge the page marked
|
||||
as SwapCache(). It's delayed until __delete_from_swap_cache().
|
||||
|
||||
4.1 Swap-in.
|
||||
At swap-in, the page is taken from swap-cache. There are 2 cases.
|
||||
|
||||
(a) If the SwapCache is newly allocated and read, it has no charges.
|
||||
(b) If the SwapCache has been mapped by processes, it has been
|
||||
charged already.
|
||||
|
||||
This swap-in is one of the most complicated work. In do_swap_page(),
|
||||
following events occur when pte is unchanged.
|
||||
|
||||
(1) the page (SwapCache) is looked up.
|
||||
(2) lock_page()
|
||||
(3) try_charge_swapin()
|
||||
(4) reuse_swap_page() (may call delete_swap_cache())
|
||||
(5) commit_charge_swapin()
|
||||
(6) swap_free().
|
||||
|
||||
Considering following situation for example.
|
||||
|
||||
(A) The page has not been charged before (2) and reuse_swap_page()
|
||||
doesn't call delete_from_swap_cache().
|
||||
(B) The page has not been charged before (2) and reuse_swap_page()
|
||||
calls delete_from_swap_cache().
|
||||
(C) The page has been charged before (2) and reuse_swap_page() doesn't
|
||||
call delete_from_swap_cache().
|
||||
(D) The page has been charged before (2) and reuse_swap_page() calls
|
||||
delete_from_swap_cache().
|
||||
|
||||
memory.usage/memsw.usage changes to this page/swp_entry will be
|
||||
Case (A) (B) (C) (D)
|
||||
Event
|
||||
Before (2) 0/ 1 0/ 1 1/ 1 1/ 1
|
||||
===========================================
|
||||
(3) +1/+1 +1/+1 +1/+1 +1/+1
|
||||
(4) - 0/ 0 - -1/ 0
|
||||
(5) 0/-1 0/ 0 -1/-1 0/ 0
|
||||
(6) - 0/-1 - 0/-1
|
||||
===========================================
|
||||
Result 1/ 1 1/ 1 1/ 1 1/ 1
|
||||
|
||||
In any cases, charges to this page should be 1/ 1.
|
||||
|
||||
4.2 Swap-out.
|
||||
At swap-out, typical state transition is below.
|
||||
|
||||
(a) add to swap cache. (marked as SwapCache)
|
||||
swp_entry's refcnt += 1.
|
||||
(b) fully unmapped.
|
||||
swp_entry's refcnt += # of ptes.
|
||||
(c) write back to swap.
|
||||
(d) delete from swap cache. (remove from SwapCache)
|
||||
swp_entry's refcnt -= 1.
|
||||
|
||||
|
||||
At (b), the page is marked as SwapCache and not uncharged.
|
||||
At (d), the page is removed from SwapCache and a charge in page_cgroup
|
||||
is moved to swap_cgroup.
|
||||
|
||||
Finally, at task exit,
|
||||
(e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
|
||||
Here, a charge in swap_cgroup disappears.
|
||||
|
||||
5. Page Cache
|
||||
Page Cache is charged at
|
||||
- add_to_page_cache_locked().
|
||||
|
||||
uncharged at
|
||||
- __remove_from_page_cache().
|
||||
|
||||
The logic is very clear. (About migration, see below)
|
||||
Note: __remove_from_page_cache() is called by remove_from_page_cache()
|
||||
and __remove_mapping().
|
||||
|
||||
6. Shmem(tmpfs) Page Cache
|
||||
Memcg's charge/uncharge have special handlers of shmem. The best way
|
||||
to understand shmem's page state transition is to read mm/shmem.c.
|
||||
But brief explanation of the behavior of memcg around shmem will be
|
||||
helpful to understand the logic.
|
||||
|
||||
Shmem's page (just leaf page, not direct/indirect block) can be on
|
||||
- radix-tree of shmem's inode.
|
||||
- SwapCache.
|
||||
- Both on radix-tree and SwapCache. This happens at swap-in
|
||||
and swap-out,
|
||||
|
||||
It's charged when...
|
||||
- A new page is added to shmem's radix-tree.
|
||||
- A swp page is read. (move a charge from swap_cgroup to page_cgroup)
|
||||
It's uncharged when
|
||||
- A page is removed from radix-tree and not SwapCache.
|
||||
- When SwapCache is removed, a charge is moved to swap_cgroup.
|
||||
- When swp_entry's refcnt goes down to 0, a charge in swap_cgroup
|
||||
disappears.
|
||||
|
||||
7. Page Migration
|
||||
One of the most complicated functions is page-migration-handler.
|
||||
Memcg has 2 routines. Assume that we are migrating a page's contents
|
||||
from OLDPAGE to NEWPAGE.
|
||||
|
||||
Usual migration logic is..
|
||||
(a) remove the page from LRU.
|
||||
(b) allocate NEWPAGE (migration target)
|
||||
(c) lock by lock_page().
|
||||
(d) unmap all mappings.
|
||||
(e-1) If necessary, replace entry in radix-tree.
|
||||
(e-2) move contents of a page.
|
||||
(f) map all mappings again.
|
||||
(g) pushback the page to LRU.
|
||||
(-) OLDPAGE will be freed.
|
||||
|
||||
Before (g), memcg should complete all necessary charge/uncharge to
|
||||
NEWPAGE/OLDPAGE.
|
||||
|
||||
The point is....
|
||||
- If OLDPAGE is anonymous, all charges will be dropped at (d) because
|
||||
try_to_unmap() drops all mapcount and the page will not be
|
||||
SwapCache.
|
||||
|
||||
- If OLDPAGE is SwapCache, charges will be kept at (g) because
|
||||
__delete_from_swap_cache() isn't called at (e-1)
|
||||
|
||||
- If OLDPAGE is page-cache, charges will be kept at (g) because
|
||||
remove_from_swap_cache() isn't called at (e-1)
|
||||
|
||||
memcg provides following hooks.
|
||||
|
||||
- mem_cgroup_prepare_migration(OLDPAGE)
|
||||
Called after (b) to account a charge (usage += PAGE_SIZE) against
|
||||
memcg which OLDPAGE belongs to.
|
||||
|
||||
- mem_cgroup_end_migration(OLDPAGE, NEWPAGE)
|
||||
Called after (f) before (g).
|
||||
If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already
|
||||
charged, a charge by prepare_migration() is automatically canceled.
|
||||
If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE.
|
||||
|
||||
But zap_pte() (by exit or munmap) can be called while migration,
|
||||
we have to check if OLDPAGE/NEWPAGE is a valid page after commit().
|
||||
|
||||
8. LRU
|
||||
Each memcg has its own private LRU. Now, it's handling is under global
|
||||
VM's control (means that it's handled under global zone->lru_lock).
|
||||
Almost all routines around memcg's LRU is called by global LRU's
|
||||
list management functions under zone->lru_lock().
|
||||
|
||||
A special function is mem_cgroup_isolate_pages(). This scans
|
||||
memcg's private LRU and call __isolate_lru_page() to extract a page
|
||||
from LRU.
|
||||
(By __isolate_lru_page(), the page is removed from both of global and
|
||||
private LRU.)
|
||||
|
||||
|
||||
9. Typical Tests.
|
||||
|
||||
Tests for racy cases.
|
||||
|
||||
9.1 Small limit to memcg.
|
||||
When you do test to do racy case, it's good test to set memcg's limit
|
||||
to be very small rather than GB. Many races found in the test under
|
||||
xKB or xxMB limits.
|
||||
(Memory behavior under GB and Memory behavior under MB shows very
|
||||
different situation.)
|
||||
|
||||
9.2 Shmem
|
||||
Historically, memcg's shmem handling was poor and we saw some amount
|
||||
of troubles here. This is because shmem is page-cache but can be
|
||||
SwapCache. Test with shmem/tmpfs is always good test.
|
||||
|
||||
9.3 Migration
|
||||
For NUMA, migration is an another special case. To do easy test, cpuset
|
||||
is useful. Following is a sample script to do migration.
|
||||
|
||||
mount -t cgroup -o cpuset none /opt/cpuset
|
||||
|
||||
mkdir /opt/cpuset/01
|
||||
echo 1 > /opt/cpuset/01/cpuset.cpus
|
||||
echo 0 > /opt/cpuset/01/cpuset.mems
|
||||
echo 1 > /opt/cpuset/01/cpuset.memory_migrate
|
||||
mkdir /opt/cpuset/02
|
||||
echo 1 > /opt/cpuset/02/cpuset.cpus
|
||||
echo 1 > /opt/cpuset/02/cpuset.mems
|
||||
echo 1 > /opt/cpuset/02/cpuset.memory_migrate
|
||||
|
||||
In above set, when you moves a task from 01 to 02, page migration to
|
||||
node 0 to node 1 will occur. Following is a script to migrate all
|
||||
under cpuset.
|
||||
--
|
||||
move_task()
|
||||
{
|
||||
for pid in $1
|
||||
do
|
||||
/bin/echo $pid >$2/tasks 2>/dev/null
|
||||
echo -n $pid
|
||||
echo -n " "
|
||||
done
|
||||
echo END
|
||||
}
|
||||
|
||||
G1_TASK=`cat ${G1}/tasks`
|
||||
G2_TASK=`cat ${G2}/tasks`
|
||||
move_task "${G1_TASK}" ${G2} &
|
||||
--
|
||||
9.4 Memory hotplug.
|
||||
memory hotplug test is one of good test.
|
||||
to offline memory, do following.
|
||||
# echo offline > /sys/devices/system/memory/memoryXXX/state
|
||||
(XXX is the place of memory)
|
||||
This is an easy way to test page migration, too.
|
||||
|
||||
9.5 mkdir/rmdir
|
||||
When using hierarchy, mkdir/rmdir test should be done.
|
||||
Use tests like the following.
|
||||
|
||||
echo 1 >/opt/cgroup/01/memory/use_hierarchy
|
||||
mkdir /opt/cgroup/01/child_a
|
||||
mkdir /opt/cgroup/01/child_b
|
||||
|
||||
set limit to 01.
|
||||
add limit to 01/child_b
|
||||
run jobs under child_a and child_b
|
||||
|
||||
create/delete following groups at random while jobs are running.
|
||||
/opt/cgroup/01/child_a/child_aa
|
||||
/opt/cgroup/01/child_b/child_bb
|
||||
/opt/cgroup/01/child_c
|
||||
|
||||
running new jobs in new group is also good.
|
||||
|
||||
9.6 Mount with other subsystems.
|
||||
Mounting with other subsystems is a good test because there is a
|
||||
race and lock dependency with other cgroup subsystems.
|
||||
|
||||
example)
|
||||
# mount -t cgroup none /cgroup -t cpuset,memory,cpu,devices
|
||||
|
||||
and do task move, mkdir, rmdir etc...under this.
|
||||
|
||||
9.7 swapoff.
|
||||
Besides management of swap is one of complicated parts of memcg,
|
||||
call path of swap-in at swapoff is not same as usual swap-in path..
|
||||
It's worth to be tested explicitly.
|
||||
|
||||
For example, test like following is good.
|
||||
(Shell-A)
|
||||
# mount -t cgroup none /cgroup -t memory
|
||||
# mkdir /cgroup/test
|
||||
# echo 40M > /cgroup/test/memory.limit_in_bytes
|
||||
# echo 0 > /cgroup/test/tasks
|
||||
Run malloc(100M) program under this. You'll see 60M of swaps.
|
||||
(Shell-B)
|
||||
# move all tasks in /cgroup/test to /cgroup
|
||||
# /sbin/swapoff -a
|
||||
# rmdir /test/cgroup
|
||||
# kill malloc task.
|
||||
|
||||
Of course, tmpfs v.s. swapoff test should be tested, too.
|
@ -137,7 +137,32 @@ behind this approach is that a cgroup that aggressively uses a shared
|
||||
page will eventually get charged for it (once it is uncharged from
|
||||
the cgroup that brought it in -- this will happen on memory pressure).
|
||||
|
||||
2.4 Reclaim
|
||||
Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used..
|
||||
When you do swapoff and make swapped-out pages of shmem(tmpfs) to
|
||||
be backed into memory in force, charges for pages are accounted against the
|
||||
caller of swapoff rather than the users of shmem.
|
||||
|
||||
|
||||
2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP)
|
||||
Swap Extension allows you to record charge for swap. A swapped-in page is
|
||||
charged back to original page allocator if possible.
|
||||
|
||||
When swap is accounted, following files are added.
|
||||
- memory.memsw.usage_in_bytes.
|
||||
- memory.memsw.limit_in_bytes.
|
||||
|
||||
usage of mem+swap is limited by memsw.limit_in_bytes.
|
||||
|
||||
Note: why 'mem+swap' rather than swap.
|
||||
The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
|
||||
to move account from memory to swap...there is no change in usage of
|
||||
mem+swap.
|
||||
|
||||
In other words, when we want to limit the usage of swap without affecting
|
||||
global LRU, mem+swap limit is better than just limiting swap from OS point
|
||||
of view.
|
||||
|
||||
2.5 Reclaim
|
||||
|
||||
Each cgroup maintains a per cgroup LRU that consists of an active
|
||||
and inactive list. When a cgroup goes over its limit, we first try
|
||||
@ -207,12 +232,6 @@ exceeded.
|
||||
The memory.stat file gives accounting information. Now, the number of
|
||||
caches, RSS and Active pages/Inactive pages are shown.
|
||||
|
||||
The memory.force_empty gives an interface to drop *all* charges by force.
|
||||
|
||||
# echo 1 > memory.force_empty
|
||||
|
||||
will drop all charges in cgroup. Currently, this is maintained for test.
|
||||
|
||||
4. Testing
|
||||
|
||||
Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11].
|
||||
@ -242,10 +261,106 @@ reclaimed.
|
||||
|
||||
A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
|
||||
cgroup might have some charge associated with it, even though all
|
||||
tasks have migrated away from it. Such charges are automatically dropped at
|
||||
rmdir() if there are no tasks.
|
||||
tasks have migrated away from it.
|
||||
Such charges are freed(at default) or moved to its parent. When moved,
|
||||
both of RSS and CACHES are moved to parent.
|
||||
If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also.
|
||||
|
||||
5. TODO
|
||||
Charges recorded in swap information is not updated at removal of cgroup.
|
||||
Recorded information is discarded and a cgroup which uses swap (swapcache)
|
||||
will be charged as a new owner of it.
|
||||
|
||||
|
||||
5. Misc. interfaces.
|
||||
|
||||
5.1 force_empty
|
||||
memory.force_empty interface is provided to make cgroup's memory usage empty.
|
||||
You can use this interface only when the cgroup has no tasks.
|
||||
When writing anything to this
|
||||
|
||||
# echo 0 > memory.force_empty
|
||||
|
||||
Almost all pages tracked by this memcg will be unmapped and freed. Some of
|
||||
pages cannot be freed because it's locked or in-use. Such pages are moved
|
||||
to parent and this cgroup will be empty. But this may return -EBUSY in
|
||||
some too busy case.
|
||||
|
||||
Typical use case of this interface is that calling this before rmdir().
|
||||
Because rmdir() moves all pages to parent, some out-of-use page caches can be
|
||||
moved to the parent. If you want to avoid that, force_empty will be useful.
|
||||
|
||||
5.2 stat file
|
||||
memory.stat file includes following statistics (now)
|
||||
cache - # of pages from page-cache and shmem.
|
||||
rss - # of pages from anonymous memory.
|
||||
pgpgin - # of event of charging
|
||||
pgpgout - # of event of uncharging
|
||||
active_anon - # of pages on active lru of anon, shmem.
|
||||
inactive_anon - # of pages on active lru of anon, shmem
|
||||
active_file - # of pages on active lru of file-cache
|
||||
inactive_file - # of pages on inactive lru of file cache
|
||||
unevictable - # of pages cannot be reclaimed.(mlocked etc)
|
||||
|
||||
Below is depend on CONFIG_DEBUG_VM.
|
||||
inactive_ratio - VM inernal parameter. (see mm/page_alloc.c)
|
||||
recent_rotated_anon - VM internal parameter. (see mm/vmscan.c)
|
||||
recent_rotated_file - VM internal parameter. (see mm/vmscan.c)
|
||||
recent_scanned_anon - VM internal parameter. (see mm/vmscan.c)
|
||||
recent_scanned_file - VM internal parameter. (see mm/vmscan.c)
|
||||
|
||||
Memo:
|
||||
recent_rotated means recent frequency of lru rotation.
|
||||
recent_scanned means recent # of scans to lru.
|
||||
showing for better debug please see the code for meanings.
|
||||
|
||||
|
||||
5.3 swappiness
|
||||
Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
|
||||
|
||||
Following cgroup's swapiness can't be changed.
|
||||
- root cgroup (uses /proc/sys/vm/swappiness).
|
||||
- a cgroup which uses hierarchy and it has child cgroup.
|
||||
- a cgroup which uses hierarchy and not the root of hierarchy.
|
||||
|
||||
|
||||
6. Hierarchy support
|
||||
|
||||
The memory controller supports a deep hierarchy and hierarchical accounting.
|
||||
The hierarchy is created by creating the appropriate cgroups in the
|
||||
cgroup filesystem. Consider for example, the following cgroup filesystem
|
||||
hierarchy
|
||||
|
||||
root
|
||||
/ | \
|
||||
/ | \
|
||||
a b c
|
||||
| \
|
||||
| \
|
||||
d e
|
||||
|
||||
In the diagram above, with hierarchical accounting enabled, all memory
|
||||
usage of e, is accounted to its ancestors up until the root (i.e, c and root),
|
||||
that has memory.use_hierarchy enabled. If one of the ancestors goes over its
|
||||
limit, the reclaim algorithm reclaims from the tasks in the ancestor and the
|
||||
children of the ancestor.
|
||||
|
||||
6.1 Enabling hierarchical accounting and reclaim
|
||||
|
||||
The memory controller by default disables the hierarchy feature. Support
|
||||
can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup
|
||||
|
||||
# echo 1 > memory.use_hierarchy
|
||||
|
||||
The feature can be disabled by
|
||||
|
||||
# echo 0 > memory.use_hierarchy
|
||||
|
||||
NOTE1: Enabling/disabling will fail if the cgroup already has other
|
||||
cgroups created below it.
|
||||
|
||||
NOTE2: This feature can be enabled/disabled per subtree.
|
||||
|
||||
7. TODO
|
||||
|
||||
1. Add support for accounting huge pages (as a separate controller)
|
||||
2. Make per-cgroup scanner reclaim not-shared pages first
|
@ -50,16 +50,17 @@ additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets
|
||||
cpu_possible_map = cpu_present_map + additional_cpus
|
||||
|
||||
(*) Option valid only for following architectures
|
||||
- x86_64, ia64
|
||||
- ia64
|
||||
|
||||
ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT
|
||||
to determine the number of potentially hot-pluggable cpus. The implementation
|
||||
should only rely on this to count the # of cpus, but *MUST* not rely on the
|
||||
apicid values in those tables for disabled apics. In the event BIOS doesn't
|
||||
mark such hot-pluggable cpus as disabled entries, one could use this
|
||||
parameter "additional_cpus=x" to represent those cpus in the cpu_possible_map.
|
||||
ia64 uses the number of disabled local apics in ACPI tables MADT to
|
||||
determine the number of potentially hot-pluggable cpus. The implementation
|
||||
should only rely on this to count the # of cpus, but *MUST* not rely
|
||||
on the apicid values in those tables for disabled apics. In the event
|
||||
BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could
|
||||
use this parameter "additional_cpus=x" to represent those cpus in the
|
||||
cpu_possible_map.
|
||||
|
||||
possible_cpus=n [s390 only] use this to set hotpluggable cpus.
|
||||
possible_cpus=n [s390,x86_64] use this to set hotpluggable cpus.
|
||||
This option sets possible_cpus bits in
|
||||
cpu_possible_map. Thus keeping the numbers of bits set
|
||||
constant even if the machine gets rebooted.
|
||||
|
@ -31,3 +31,51 @@ not defined by include/asm-XXX/topology.h:
|
||||
2) core_id: 0
|
||||
3) thread_siblings: just the given CPU
|
||||
4) core_siblings: just the given CPU
|
||||
|
||||
Additionally, cpu topology information is provided under
|
||||
/sys/devices/system/cpu and includes these files. The internal
|
||||
source for the output is in brackets ("[]").
|
||||
|
||||
kernel_max: the maximum cpu index allowed by the kernel configuration.
|
||||
[NR_CPUS-1]
|
||||
|
||||
offline: cpus that are not online because they have been
|
||||
HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit
|
||||
of cpus allowed by the kernel configuration (kernel_max
|
||||
above). [~cpu_online_mask + cpus >= NR_CPUS]
|
||||
|
||||
online: cpus that are online and being scheduled [cpu_online_mask]
|
||||
|
||||
possible: cpus that have been allocated resources and can be
|
||||
brought online if they are present. [cpu_possible_mask]
|
||||
|
||||
present: cpus that have been identified as being present in the
|
||||
system. [cpu_present_mask]
|
||||
|
||||
The format for the above output is compatible with cpulist_parse()
|
||||
[see <linux/cpumask.h>]. Some examples follow.
|
||||
|
||||
In this example, there are 64 cpus in the system but cpus 32-63 exceed
|
||||
the kernel max which is limited to 0..31 by the NR_CPUS config option
|
||||
being 32. Note also that cpus 2 and 4-31 are not online but could be
|
||||
brought online as they are both present and possible.
|
||||
|
||||
kernel_max: 31
|
||||
offline: 2,4-31,32-63
|
||||
online: 0-1,3
|
||||
possible: 0-31
|
||||
present: 0-31
|
||||
|
||||
In this example, the NR_CPUS config option is 128, but the kernel was
|
||||
started with possible_cpus=144. There are 4 cpus in the system and cpu2
|
||||
was manually taken offline (and is the only cpu that can be brought
|
||||
online.)
|
||||
|
||||
kernel_max: 127
|
||||
offline: 2,4-127,128-143
|
||||
online: 0-1,3
|
||||
possible: 0-127
|
||||
present: 0-3
|
||||
|
||||
See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter
|
||||
as well as more information on the various cpumask's.
|
||||
|
@ -13,9 +13,9 @@
|
||||
3.6 Constraints
|
||||
3.7 Example
|
||||
|
||||
4 DRIVER DEVELOPER NOTES
|
||||
4 DMAENGINE DRIVER DEVELOPER NOTES
|
||||
4.1 Conformance points
|
||||
4.2 "My application needs finer control of hardware channels"
|
||||
4.2 "My application needs exclusive control of hardware channels"
|
||||
|
||||
5 SOURCE
|
||||
|
||||
@ -150,6 +150,7 @@ ops_run_* and ops_complete_* routines in drivers/md/raid5.c for more
|
||||
implementation examples.
|
||||
|
||||
4 DRIVER DEVELOPMENT NOTES
|
||||
|
||||
4.1 Conformance points:
|
||||
There are a few conformance points required in dmaengine drivers to
|
||||
accommodate assumptions made by applications using the async_tx API:
|
||||
@ -158,58 +159,49 @@ accommodate assumptions made by applications using the async_tx API:
|
||||
3/ Use async_tx_run_dependencies() in the descriptor clean up path to
|
||||
handle submission of dependent operations
|
||||
|
||||
4.2 "My application needs finer control of hardware channels"
|
||||
This requirement seems to arise from cases where a DMA engine driver is
|
||||
trying to support device-to-memory DMA. The dmaengine and async_tx
|
||||
implementations were designed for offloading memory-to-memory
|
||||
operations; however, there are some capabilities of the dmaengine layer
|
||||
that can be used for platform-specific channel management.
|
||||
Platform-specific constraints can be handled by registering the
|
||||
application as a 'dma_client' and implementing a 'dma_event_callback' to
|
||||
apply a filter to the available channels in the system. Before showing
|
||||
how to implement a custom dma_event callback some background of
|
||||
dmaengine's client support is required.
|
||||
4.2 "My application needs exclusive control of hardware channels"
|
||||
Primarily this requirement arises from cases where a DMA engine driver
|
||||
is being used to support device-to-memory operations. A channel that is
|
||||
performing these operations cannot, for many platform specific reasons,
|
||||
be shared. For these cases the dma_request_channel() interface is
|
||||
provided.
|
||||
|
||||
The following routines in dmaengine support multiple clients requesting
|
||||
use of a channel:
|
||||
- dma_async_client_register(struct dma_client *client)
|
||||
- dma_async_client_chan_request(struct dma_client *client)
|
||||
The interface is:
|
||||
struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
|
||||
dma_filter_fn filter_fn,
|
||||
void *filter_param);
|
||||
|
||||
dma_async_client_register takes a pointer to an initialized dma_client
|
||||
structure. It expects that the 'event_callback' and 'cap_mask' fields
|
||||
are already initialized.
|
||||
Where dma_filter_fn is defined as:
|
||||
typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
|
||||
|
||||
dma_async_client_chan_request triggers dmaengine to notify the client of
|
||||
all channels that satisfy the capability mask. It is up to the client's
|
||||
event_callback routine to track how many channels the client needs and
|
||||
how many it is currently using. The dma_event_callback routine returns a
|
||||
dma_state_client code to let dmaengine know the status of the
|
||||
allocation.
|
||||
When the optional 'filter_fn' parameter is set to NULL
|
||||
dma_request_channel simply returns the first channel that satisfies the
|
||||
capability mask. Otherwise, when the mask parameter is insufficient for
|
||||
specifying the necessary channel, the filter_fn routine can be used to
|
||||
disposition the available channels in the system. The filter_fn routine
|
||||
is called once for each free channel in the system. Upon seeing a
|
||||
suitable channel filter_fn returns DMA_ACK which flags that channel to
|
||||
be the return value from dma_request_channel. A channel allocated via
|
||||
this interface is exclusive to the caller, until dma_release_channel()
|
||||
is called.
|
||||
|
||||
Below is the example of how to extend this functionality for
|
||||
platform-specific filtering of the available channels beyond the
|
||||
standard capability mask:
|
||||
The DMA_PRIVATE capability flag is used to tag dma devices that should
|
||||
not be used by the general-purpose allocator. It can be set at
|
||||
initialization time if it is known that a channel will always be
|
||||
private. Alternatively, it is set when dma_request_channel() finds an
|
||||
unused "public" channel.
|
||||
|
||||
static enum dma_state_client
|
||||
my_dma_client_callback(struct dma_client *client,
|
||||
struct dma_chan *chan, enum dma_state state)
|
||||
{
|
||||
struct dma_device *dma_dev;
|
||||
struct my_platform_specific_dma *plat_dma_dev;
|
||||
|
||||
dma_dev = chan->device;
|
||||
plat_dma_dev = container_of(dma_dev,
|
||||
struct my_platform_specific_dma,
|
||||
dma_dev);
|
||||
|
||||
if (!plat_dma_dev->platform_specific_capability)
|
||||
return DMA_DUP;
|
||||
|
||||
. . .
|
||||
}
|
||||
A couple caveats to note when implementing a driver and consumer:
|
||||
1/ Once a channel has been privately allocated it will no longer be
|
||||
considered by the general-purpose allocator even after a call to
|
||||
dma_release_channel().
|
||||
2/ Since capabilities are specified at the device level a dma_device
|
||||
with multiple channels will either have all channels public, or all
|
||||
channels private.
|
||||
|
||||
5 SOURCE
|
||||
include/linux/dmaengine.h: core header file for DMA drivers and clients
|
||||
|
||||
include/linux/dmaengine.h: core header file for DMA drivers and api users
|
||||
drivers/dma/dmaengine.c: offload engine channel management routines
|
||||
drivers/dma/: location for offload engine drivers
|
||||
include/linux/async_tx.h: core header file for the async_tx api
|
||||
|
@ -81,8 +81,8 @@ Until this step is completed the driver cannot be unloaded.
|
||||
Also echoing either mono ,packet or init in to image_type will free up the
|
||||
memory allocated by the driver.
|
||||
|
||||
If an user by accident executes steps 1 and 3 above without executing step 2;
|
||||
it will make the /sys/class/firmware/dell_rbu/ entries to disappear.
|
||||
If a user by accident executes steps 1 and 3 above without executing step 2;
|
||||
it will make the /sys/class/firmware/dell_rbu/ entries disappear.
|
||||
The entries can be recreated by doing the following
|
||||
echo init > /sys/devices/platform/dell_rbu/image_type
|
||||
NOTE: echoing init in image_type does not change it original value.
|
||||
|
@ -375,10 +375,10 @@ say, this can be a large job, so it is best to be sure that the
|
||||
justification is solid.
|
||||
|
||||
When making an incompatible API change, one should, whenever possible,
|
||||
ensure that code which has not been updated is caught by the compiler.
|
||||
ensure that code which has not been updated is caught by the compiler.
|
||||
This will help you to be sure that you have found all in-tree uses of that
|
||||
interface. It will also alert developers of out-of-tree code that there is
|
||||
a change that they need to respond to. Supporting out-of-tree code is not
|
||||
something that kernel developers need to be worried about, but we also do
|
||||
not have to make life harder for out-of-tree developers than it it needs to
|
||||
be.
|
||||
not have to make life harder for out-of-tree developers than it needs to
|
||||
be.
|
||||
|
1
Documentation/dmaengine.txt
Normal file
1
Documentation/dmaengine.txt
Normal file
@ -0,0 +1 @@
|
||||
See Documentation/crypto/async-tx-api.txt
|
@ -310,15 +310,6 @@ Who: Krzysztof Piotr Oledzki <ole@ans.pl>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: ide-scsi (BLK_DEV_IDESCSI)
|
||||
When: 2.6.29
|
||||
Why: The 2.6 kernel supports direct writing to ide CD drives, which
|
||||
eliminates the need for ide-scsi. The new method is more
|
||||
efficient in every way.
|
||||
Who: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: i2c_attach_client(), i2c_detach_client(), i2c_driver->detach_client()
|
||||
When: 2.6.29 (ideally) or 2.6.30 (more likely)
|
||||
Why: Deprecated by the new (standard) device driver binding model. Use
|
||||
@ -327,6 +318,14 @@ Who: Jean Delvare <khali@linux-fr.org>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: fscher and fscpos drivers
|
||||
When: June 2009
|
||||
Why: Deprecated by the new fschmd driver.
|
||||
Who: Hans de Goede <hdegoede@redhat.com>
|
||||
Jean Delvare <khali@linux-fr.org>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: SELinux "compat_net" functionality
|
||||
When: 2.6.30 at the earliest
|
||||
Why: In 2.6.18 the Secmark concept was introduced to replace the "compat_net"
|
||||
|
@ -97,8 +97,8 @@ prototypes:
|
||||
void (*put_super) (struct super_block *);
|
||||
void (*write_super) (struct super_block *);
|
||||
int (*sync_fs)(struct super_block *sb, int wait);
|
||||
void (*write_super_lockfs) (struct super_block *);
|
||||
void (*unlockfs) (struct super_block *);
|
||||
int (*freeze_fs) (struct super_block *);
|
||||
int (*unfreeze_fs) (struct super_block *);
|
||||
int (*statfs) (struct dentry *, struct kstatfs *);
|
||||
int (*remount_fs) (struct super_block *, int *, char *);
|
||||
void (*clear_inode) (struct inode *);
|
||||
@ -119,8 +119,8 @@ delete_inode: no
|
||||
put_super: yes yes no
|
||||
write_super: no yes read
|
||||
sync_fs: no no read
|
||||
write_super_lockfs: ?
|
||||
unlockfs: ?
|
||||
freeze_fs: ?
|
||||
unfreeze_fs: ?
|
||||
statfs: no no no
|
||||
remount_fs: yes yes maybe (see below)
|
||||
clear_inode: no
|
||||
@ -394,11 +394,10 @@ prototypes:
|
||||
unsigned long (*get_unmapped_area)(struct file *, unsigned long,
|
||||
unsigned long, unsigned long, unsigned long);
|
||||
int (*check_flags)(int);
|
||||
int (*dir_notify)(struct file *, unsigned long);
|
||||
};
|
||||
|
||||
locking rules:
|
||||
All except ->poll() may block.
|
||||
All may block.
|
||||
BKL
|
||||
llseek: no (see below)
|
||||
read: no
|
||||
@ -424,7 +423,6 @@ sendfile: no
|
||||
sendpage: no
|
||||
get_unmapped_area: no
|
||||
check_flags: no
|
||||
dir_notify: no
|
||||
|
||||
->llseek() locking has moved from llseek to the individual llseek
|
||||
implementations. If your fs is not using generic_file_llseek, you
|
||||
|
91
Documentation/filesystems/btrfs.txt
Normal file
91
Documentation/filesystems/btrfs.txt
Normal file
@ -0,0 +1,91 @@
|
||||
|
||||
BTRFS
|
||||
=====
|
||||
|
||||
Btrfs is a new copy on write filesystem for Linux aimed at
|
||||
implementing advanced features while focusing on fault tolerance,
|
||||
repair and easy administration. Initially developed by Oracle, Btrfs
|
||||
is licensed under the GPL and open for contribution from anyone.
|
||||
|
||||
Linux has a wealth of filesystems to choose from, but we are facing a
|
||||
number of challenges with scaling to the large storage subsystems that
|
||||
are becoming common in today's data centers. Filesystems need to scale
|
||||
in their ability to address and manage large storage, and also in
|
||||
their ability to detect, repair and tolerate errors in the data stored
|
||||
on disk. Btrfs is under heavy development, and is not suitable for
|
||||
any uses other than benchmarking and review. The Btrfs disk format is
|
||||
not yet finalized.
|
||||
|
||||
The main Btrfs features include:
|
||||
|
||||
* Extent based file storage (2^64 max file size)
|
||||
* Space efficient packing of small files
|
||||
* Space efficient indexed directories
|
||||
* Dynamic inode allocation
|
||||
* Writable snapshots
|
||||
* Subvolumes (separate internal filesystem roots)
|
||||
* Object level mirroring and striping
|
||||
* Checksums on data and metadata (multiple algorithms available)
|
||||
* Compression
|
||||
* Integrated multiple device support, with several raid algorithms
|
||||
* Online filesystem check (not yet implemented)
|
||||
* Very fast offline filesystem check
|
||||
* Efficient incremental backup and FS mirroring (not yet implemented)
|
||||
* Online filesystem defragmentation
|
||||
|
||||
|
||||
|
||||
MAILING LIST
|
||||
============
|
||||
|
||||
There is a Btrfs mailing list hosted on vger.kernel.org. You can
|
||||
find details on how to subscribe here:
|
||||
|
||||
http://vger.kernel.org/vger-lists.html#linux-btrfs
|
||||
|
||||
Mailing list archives are available from gmane:
|
||||
|
||||
http://dir.gmane.org/gmane.comp.file-systems.btrfs
|
||||
|
||||
|
||||
|
||||
IRC
|
||||
===
|
||||
|
||||
Discussion of Btrfs also occurs on the #btrfs channel of the Freenode
|
||||
IRC network.
|
||||
|
||||
|
||||
|
||||
UTILITIES
|
||||
=========
|
||||
|
||||
Userspace tools for creating and manipulating Btrfs file systems are
|
||||
available from the git repository at the following location:
|
||||
|
||||
http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-progs-unstable.git
|
||||
git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-progs-unstable.git
|
||||
|
||||
These include the following tools:
|
||||
|
||||
mkfs.btrfs: create a filesystem
|
||||
|
||||
btrfsctl: control program to create snapshots and subvolumes:
|
||||
|
||||
mount /dev/sda2 /mnt
|
||||
btrfsctl -s new_subvol_name /mnt
|
||||
btrfsctl -s snapshot_of_default /mnt/default
|
||||
btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name
|
||||
btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol
|
||||
ls /mnt
|
||||
default snapshot_of_a_snapshot snapshot_of_new_subvol
|
||||
new_subvol_name snapshot_of_default
|
||||
|
||||
Snapshots and subvolumes cannot be deleted right now, but you can
|
||||
rm -rf all the files and directories inside them.
|
||||
|
||||
btrfsck: do a limited check of the FS extent trees.
|
||||
|
||||
btrfs-debug-tree: print all of the FS metadata in text form. Example:
|
||||
|
||||
btrfs-debug-tree /dev/sda2 >& big_output_file
|
132
Documentation/filesystems/devpts.txt
Normal file
132
Documentation/filesystems/devpts.txt
Normal file
@ -0,0 +1,132 @@
|
||||
|
||||
To support containers, we now allow multiple instances of devpts filesystem,
|
||||
such that indices of ptys allocated in one instance are independent of indices
|
||||
allocated in other instances of devpts.
|
||||
|
||||
To preserve backward compatibility, this support for multiple instances is
|
||||
enabled only if:
|
||||
|
||||
- CONFIG_DEVPTS_MULTIPLE_INSTANCES=y, and
|
||||
- '-o newinstance' mount option is specified while mounting devpts
|
||||
|
||||
IOW, devpts now supports both single-instance and multi-instance semantics.
|
||||
|
||||
If CONFIG_DEVPTS_MULTIPLE_INSTANCES=n, there is no change in behavior and
|
||||
this referred to as the "legacy" mode. In this mode, the new mount options
|
||||
(-o newinstance and -o ptmxmode) will be ignored with a 'bogus option' message
|
||||
on console.
|
||||
|
||||
If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and devpts is mounted without the
|
||||
'newinstance' option (as in current start-up scripts) the new mount binds
|
||||
to the initial kernel mount of devpts. This mode is referred to as the
|
||||
'single-instance' mode and the current, single-instance semantics are
|
||||
preserved, i.e PTYs are common across the system.
|
||||
|
||||
The only difference between this single-instance mode and the legacy mode
|
||||
is the presence of new, '/dev/pts/ptmx' node with permissions 0000, which
|
||||
can safely be ignored.
|
||||
|
||||
If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and 'newinstance' option is specified,
|
||||
the mount is considered to be in the multi-instance mode and a new instance
|
||||
of the devpts fs is created. Any ptys created in this instance are independent
|
||||
of ptys in other instances of devpts. Like in the single-instance mode, the
|
||||
/dev/pts/ptmx node is present. To effectively use the multi-instance mode,
|
||||
open of /dev/ptmx must be a redirected to '/dev/pts/ptmx' using a symlink or
|
||||
bind-mount.
|
||||
|
||||
Eg: A container startup script could do the following:
|
||||
|
||||
$ chmod 0666 /dev/pts/ptmx
|
||||
$ rm /dev/ptmx
|
||||
$ ln -s pts/ptmx /dev/ptmx
|
||||
$ ns_exec -cm /bin/bash
|
||||
|
||||
# We are now in new container
|
||||
|
||||
$ umount /dev/pts
|
||||
$ mount -t devpts -o newinstance lxcpts /dev/pts
|
||||
$ sshd -p 1234
|
||||
|
||||
where 'ns_exec -cm /bin/bash' calls clone() with CLONE_NEWNS flag and execs
|
||||
/bin/bash in the child process. A pty created by the sshd is not visible in
|
||||
the original mount of /dev/pts.
|
||||
|
||||
User-space changes
|
||||
------------------
|
||||
|
||||
In multi-instance mode (i.e '-o newinstance' mount option is specified at least
|
||||
once), following user-space issues should be noted.
|
||||
|
||||
1. If -o newinstance mount option is never used, /dev/pts/ptmx can be ignored
|
||||
and no change is needed to system-startup scripts.
|
||||
|
||||
2. To effectively use multi-instance mode (i.e -o newinstance is specified)
|
||||
administrators or startup scripts should "redirect" open of /dev/ptmx to
|
||||
/dev/pts/ptmx using either a bind mount or symlink.
|
||||
|
||||
$ mount -t devpts -o newinstance devpts /dev/pts
|
||||
|
||||
followed by either
|
||||
|
||||
$ rm /dev/ptmx
|
||||
$ ln -s pts/ptmx /dev/ptmx
|
||||
$ chmod 666 /dev/pts/ptmx
|
||||
or
|
||||
$ mount -o bind /dev/pts/ptmx /dev/ptmx
|
||||
|
||||
3. The '/dev/ptmx -> pts/ptmx' symlink is the preferred method since it
|
||||
enables better error-reporting and treats both single-instance and
|
||||
multi-instance mounts similarly.
|
||||
|
||||
But this method requires that system-startup scripts set the mode of
|
||||
/dev/pts/ptmx correctly (default mode is 0000). The scripts can set the
|
||||
mode by, either
|
||||
|
||||
- adding ptmxmode mount option to devpts entry in /etc/fstab, or
|
||||
- using 'chmod 0666 /dev/pts/ptmx'
|
||||
|
||||
4. If multi-instance mode mount is needed for containers, but the system
|
||||
startup scripts have not yet been updated, container-startup scripts
|
||||
should bind mount /dev/ptmx to /dev/pts/ptmx to avoid breaking single-
|
||||
instance mounts.
|
||||
|
||||
Or, in general, container-startup scripts should use:
|
||||
|
||||
mount -t devpts -o newinstance -o ptmxmode=0666 devpts /dev/pts
|
||||
if [ ! -L /dev/ptmx ]; then
|
||||
mount -o bind /dev/pts/ptmx /dev/ptmx
|
||||
fi
|
||||
|
||||
When all devpts mounts are multi-instance, /dev/ptmx can permanently be
|
||||
a symlink to pts/ptmx and the bind mount can be ignored.
|
||||
|
||||
5. A multi-instance mount that is not accompanied by the /dev/ptmx to
|
||||
/dev/pts/ptmx redirection would result in an unusable/unreachable pty.
|
||||
|
||||
mount -t devpts -o newinstance lxcpts /dev/pts
|
||||
|
||||
immediately followed by:
|
||||
|
||||
open("/dev/ptmx")
|
||||
|
||||
would create a pty, say /dev/pts/7, in the initial kernel mount.
|
||||
But /dev/pts/7 would be invisible in the new mount.
|
||||
|
||||
6. The permissions for /dev/pts/ptmx node should be specified when mounting
|
||||
/dev/pts, using the '-o ptmxmode=%o' mount option (default is 0000).
|
||||
|
||||
mount -t devpts -o newinstance -o ptmxmode=0644 devpts /dev/pts
|
||||
|
||||
The permissions can be later be changed as usual with 'chmod'.
|
||||
|
||||
chmod 666 /dev/pts/ptmx
|
||||
|
||||
7. A mount of devpts without the 'newinstance' option results in binding to
|
||||
initial kernel mount. This behavior while preserving legacy semantics,
|
||||
does not provide strict isolation in a container environment. i.e by
|
||||
mounting devpts without the 'newinstance' option, a container could
|
||||
get visibility into the 'host' or root container's devpts.
|
||||
|
||||
To workaround this and have strict isolation, all mounts of devpts,
|
||||
including the mount in the root container, should use the newinstance
|
||||
option.
|
@ -58,13 +58,22 @@ Note: More extensive information for getting started with ext4 can be
|
||||
|
||||
# mount -t ext4 /dev/hda1 /wherever
|
||||
|
||||
- When comparing performance with other filesystems, remember that
|
||||
ext3/4 by default offers higher data integrity guarantees than most.
|
||||
So when comparing with a metadata-only journalling filesystem, such
|
||||
as ext3, use `mount -o data=writeback'. And you might as well use
|
||||
`mount -o nobh' too along with it. Making the journal larger than
|
||||
the mke2fs default often helps performance with metadata-intensive
|
||||
workloads.
|
||||
- When comparing performance with other filesystems, it's always
|
||||
important to try multiple workloads; very often a subtle change in a
|
||||
workload parameter can completely change the ranking of which
|
||||
filesystems do well compared to others. When comparing versus ext3,
|
||||
note that ext4 enables write barriers by default, while ext3 does
|
||||
not enable write barriers by default. So it is useful to use
|
||||
explicitly specify whether barriers are enabled or not when via the
|
||||
'-o barriers=[0|1]' mount option for both ext3 and ext4 filesystems
|
||||
for a fair comparison. When tuning ext3 for best benchmark numbers,
|
||||
it is often worthwhile to try changing the data journaling mode; '-o
|
||||
data=writeback,nobh' can be faster for some workloads. (Note
|
||||
however that running mounted with data=writeback can potentially
|
||||
leave stale data exposed in recently written files in case of an
|
||||
unclean shutdown, which could be a security exposure in some
|
||||
situations.) Configuring the filesystem with a large journal can
|
||||
also be helpful for metadata-intensive workloads.
|
||||
|
||||
2. Features
|
||||
===========
|
||||
@ -74,7 +83,7 @@ Note: More extensive information for getting started with ext4 can be
|
||||
* ability to use filesystems > 16TB (e2fsprogs support not available yet)
|
||||
* extent format reduces metadata overhead (RAM, IO for access, transactions)
|
||||
* extent format more robust in face of on-disk corruption due to magics,
|
||||
* internal redunancy in tree
|
||||
* internal redundancy in tree
|
||||
* improved file allocation (multi-block alloc)
|
||||
* fix 32000 subdirectory limit
|
||||
* nsec timestamps for mtime, atime, ctime, create time
|
||||
@ -116,10 +125,11 @@ grouping of bitmaps and inode tables. Some test results available here:
|
||||
When mounting an ext4 filesystem, the following option are accepted:
|
||||
(*) == default
|
||||
|
||||
extents (*) ext4 will use extents to address file data. The
|
||||
file system will no longer be mountable by ext3.
|
||||
|
||||
noextents ext4 will not use extents for newly created files
|
||||
ro Mount filesystem read only. Note that ext4 will
|
||||
replay the journal (and thus write to the
|
||||
partition) even when mounted "read only". The
|
||||
mount options "ro,noload" can be used to prevent
|
||||
writes to the filesystem.
|
||||
|
||||
journal_checksum Enable checksumming of the journal transactions.
|
||||
This will allow the recovery code in e2fsck and the
|
||||
@ -134,17 +144,17 @@ journal_async_commit Commit block can be written to disk without waiting
|
||||
journal=update Update the ext4 file system's journal to the current
|
||||
format.
|
||||
|
||||
journal=inum When a journal already exists, this option is ignored.
|
||||
Otherwise, it specifies the number of the inode which
|
||||
will represent the ext4 file system's journal file.
|
||||
|
||||
journal_dev=devnum When the external journal device's major/minor numbers
|
||||
have changed, this option allows the user to specify
|
||||
the new journal location. The journal device is
|
||||
identified through its new major/minor numbers encoded
|
||||
in devnum.
|
||||
|
||||
noload Don't load the journal on mounting.
|
||||
noload Don't load the journal on mounting. Note that
|
||||
if the filesystem was not unmounted cleanly,
|
||||
skipping the journal replay will lead to the
|
||||
filesystem containing inconsistencies that can
|
||||
lead to any number of problems.
|
||||
|
||||
data=journal All data are committed into the journal prior to being
|
||||
written into the main file system.
|
||||
@ -219,9 +229,12 @@ minixdf Make 'df' act like Minix.
|
||||
|
||||
debug Extra debugging information is sent to syslog.
|
||||
|
||||
errors=remount-ro(*) Remount the filesystem read-only on an error.
|
||||
errors=remount-ro Remount the filesystem read-only on an error.
|
||||
errors=continue Keep going on a filesystem error.
|
||||
errors=panic Panic and halt the machine if an error occurs.
|
||||
(These mount options override the errors behavior
|
||||
specified in the superblock, which can be configured
|
||||
using tune2fs)
|
||||
|
||||
data_err=ignore(*) Just print an error message if an error occurs
|
||||
in a file data buffer in ordered mode.
|
||||
@ -261,6 +274,42 @@ delalloc (*) Deferring block allocation until write-out time.
|
||||
nodelalloc Disable delayed allocation. Blocks are allocation
|
||||
when data is copied from user to page cache.
|
||||
|
||||
max_batch_time=usec Maximum amount of time ext4 should wait for
|
||||
additional filesystem operations to be batch
|
||||
together with a synchronous write operation.
|
||||
Since a synchronous write operation is going to
|
||||
force a commit and then a wait for the I/O
|
||||
complete, it doesn't cost much, and can be a
|
||||
huge throughput win, we wait for a small amount
|
||||
of time to see if any other transactions can
|
||||
piggyback on the synchronous write. The
|
||||
algorithm used is designed to automatically tune
|
||||
for the speed of the disk, by measuring the
|
||||
amount of time (on average) that it takes to
|
||||
finish committing a transaction. Call this time
|
||||
the "commit time". If the time that the
|
||||
transactoin has been running is less than the
|
||||
commit time, ext4 will try sleeping for the
|
||||
commit time to see if other operations will join
|
||||
the transaction. The commit time is capped by
|
||||
the max_batch_time, which defaults to 15000us
|
||||
(15ms). This optimization can be turned off
|
||||
entirely by setting max_batch_time to 0.
|
||||
|
||||
min_batch_time=usec This parameter sets the commit time (as
|
||||
described above) to be at least min_batch_time.
|
||||
It defaults to zero microseconds. Increasing
|
||||
this parameter may improve the throughput of
|
||||
multi-threaded, synchronous workloads on very
|
||||
fast disks, at the cost of increasing latency.
|
||||
|
||||
journal_ioprio=prio The I/O priority (from 0 to 7, where 0 is the
|
||||
highest priorty) which should be used for I/O
|
||||
operations submitted by kjournald2 during a
|
||||
commit operation. This defaults to 3, which is
|
||||
a slightly higher priority than the default I/O
|
||||
priority.
|
||||
|
||||
Data Mode
|
||||
=========
|
||||
There are 3 different data modes:
|
||||
|
@ -76,13 +76,13 @@ the fdtable structure -
|
||||
5. Handling of the file structures is special. Since the look-up
|
||||
of the fd (fget()/fget_light()) are lock-free, it is possible
|
||||
that look-up may race with the last put() operation on the
|
||||
file structure. This is avoided using atomic_inc_not_zero()
|
||||
file structure. This is avoided using atomic_long_inc_not_zero()
|
||||
on ->f_count :
|
||||
|
||||
rcu_read_lock();
|
||||
file = fcheck_files(files, fd);
|
||||
if (file) {
|
||||
if (atomic_inc_not_zero(&file->f_count))
|
||||
if (atomic_long_inc_not_zero(&file->f_count))
|
||||
*fput_needed = 1;
|
||||
else
|
||||
/* Didn't get the reference, someone's freed */
|
||||
@ -92,7 +92,7 @@ the fdtable structure -
|
||||
....
|
||||
return file;
|
||||
|
||||
atomic_inc_not_zero() detects if refcounts is already zero or
|
||||
atomic_long_inc_not_zero() detects if refcounts is already zero or
|
||||
goes to zero during increment. If it does, we fail
|
||||
fget()/fget_light().
|
||||
|
||||
|
@ -251,7 +251,7 @@ NFS/RDMA Setup
|
||||
|
||||
Instruct the server to listen on the RDMA transport:
|
||||
|
||||
$ echo rdma 2050 > /proc/fs/nfsd/portlist
|
||||
$ echo rdma 20049 > /proc/fs/nfsd/portlist
|
||||
|
||||
- On the client system
|
||||
|
||||
@ -263,7 +263,7 @@ NFS/RDMA Setup
|
||||
Regardless of how the client was built (module or built-in), use this
|
||||
command to mount the NFS/RDMA server:
|
||||
|
||||
$ mount -o rdma,port=2050 <IPoIB-server-name-or-address>:/<export> /mnt
|
||||
$ mount -o rdma,port=20049 <IPoIB-server-name-or-address>:/<export> /mnt
|
||||
|
||||
To verify that the mount is using RDMA, run "cat /proc/mounts" and check
|
||||
the "proto" field for the given mount.
|
||||
|
@ -31,7 +31,6 @@ Features which OCFS2 does not support yet:
|
||||
- quotas
|
||||
- Directory change notification (F_NOTIFY)
|
||||
- Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
|
||||
- POSIX ACLs
|
||||
|
||||
Mount options
|
||||
=============
|
||||
@ -79,3 +78,5 @@ inode64 Indicates that Ocfs2 is allowed to create inodes at
|
||||
bits of significance.
|
||||
user_xattr (*) Enables Extended User Attributes.
|
||||
nouser_xattr Disables Extended User Attributes.
|
||||
acl Enables POSIX Access Control Lists support.
|
||||
noacl (*) Disables POSIX Access Control Lists support.
|
||||
|
@ -140,6 +140,7 @@ Table 1-1: Process specific entries in /proc
|
||||
statm Process memory status information
|
||||
status Process status in human readable form
|
||||
wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan
|
||||
stack Report full stack trace, enable via CONFIG_STACKTRACE
|
||||
smaps Extension based on maps, the rss size for each mapped file
|
||||
..............................................................................
|
||||
|
||||
@ -1370,268 +1371,8 @@ auto_msgmni default value is 1.
|
||||
2.4 /proc/sys/vm - The virtual memory subsystem
|
||||
-----------------------------------------------
|
||||
|
||||
The files in this directory can be used to tune the operation of the virtual
|
||||
memory (VM) subsystem of the Linux kernel.
|
||||
|
||||
vfs_cache_pressure
|
||||
------------------
|
||||
|
||||
Controls the tendency of the kernel to reclaim the memory which is used for
|
||||
caching of directory and inode objects.
|
||||
|
||||
At the default value of vfs_cache_pressure=100 the kernel will attempt to
|
||||
reclaim dentries and inodes at a "fair" rate with respect to pagecache and
|
||||
swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer
|
||||
to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100
|
||||
causes the kernel to prefer to reclaim dentries and inodes.
|
||||
|
||||
dirty_background_ratio
|
||||
----------------------
|
||||
|
||||
Contains, as a percentage of the dirtyable system memory (free pages + mapped
|
||||
pages + file cache, not including locked pages and HugePages), the number of
|
||||
pages at which the pdflush background writeback daemon will start writing out
|
||||
dirty data.
|
||||
|
||||
dirty_ratio
|
||||
-----------------
|
||||
|
||||
Contains, as a percentage of the dirtyable system memory (free pages + mapped
|
||||
pages + file cache, not including locked pages and HugePages), the number of
|
||||
pages at which a process which is generating disk writes will itself start
|
||||
writing out dirty data.
|
||||
|
||||
dirty_writeback_centisecs
|
||||
-------------------------
|
||||
|
||||
The pdflush writeback daemons will periodically wake up and write `old' data
|
||||
out to disk. This tunable expresses the interval between those wakeups, in
|
||||
100'ths of a second.
|
||||
|
||||
Setting this to zero disables periodic writeback altogether.
|
||||
|
||||
dirty_expire_centisecs
|
||||
----------------------
|
||||
|
||||
This tunable is used to define when dirty data is old enough to be eligible
|
||||
for writeout by the pdflush daemons. It is expressed in 100'ths of a second.
|
||||
Data which has been dirty in-memory for longer than this interval will be
|
||||
written out next time a pdflush daemon wakes up.
|
||||
|
||||
highmem_is_dirtyable
|
||||
--------------------
|
||||
|
||||
Only present if CONFIG_HIGHMEM is set.
|
||||
|
||||
This defaults to 0 (false), meaning that the ratios set above are calculated
|
||||
as a percentage of lowmem only. This protects against excessive scanning
|
||||
in page reclaim, swapping and general VM distress.
|
||||
|
||||
Setting this to 1 can be useful on 32 bit machines where you want to make
|
||||
random changes within an MMAPed file that is larger than your available
|
||||
lowmem without causing large quantities of random IO. Is is safe if the
|
||||
behavior of all programs running on the machine is known and memory will
|
||||
not be otherwise stressed.
|
||||
|
||||
legacy_va_layout
|
||||
----------------
|
||||
|
||||
If non-zero, this sysctl disables the new 32-bit mmap mmap layout - the kernel
|
||||
will use the legacy (2.4) layout for all processes.
|
||||
|
||||
lowmem_reserve_ratio
|
||||
---------------------
|
||||
|
||||
For some specialised workloads on highmem machines it is dangerous for
|
||||
the kernel to allow process memory to be allocated from the "lowmem"
|
||||
zone. This is because that memory could then be pinned via the mlock()
|
||||
system call, or by unavailability of swapspace.
|
||||
|
||||
And on large highmem machines this lack of reclaimable lowmem memory
|
||||
can be fatal.
|
||||
|
||||
So the Linux page allocator has a mechanism which prevents allocations
|
||||
which _could_ use highmem from using too much lowmem. This means that
|
||||
a certain amount of lowmem is defended from the possibility of being
|
||||
captured into pinned user memory.
|
||||
|
||||
(The same argument applies to the old 16 megabyte ISA DMA region. This
|
||||
mechanism will also defend that region from allocations which could use
|
||||
highmem or lowmem).
|
||||
|
||||
The `lowmem_reserve_ratio' tunable determines how aggressive the kernel is
|
||||
in defending these lower zones.
|
||||
|
||||
If you have a machine which uses highmem or ISA DMA and your
|
||||
applications are using mlock(), or if you are running with no swap then
|
||||
you probably should change the lowmem_reserve_ratio setting.
|
||||
|
||||
The lowmem_reserve_ratio is an array. You can see them by reading this file.
|
||||
-
|
||||
% cat /proc/sys/vm/lowmem_reserve_ratio
|
||||
256 256 32
|
||||
-
|
||||
Note: # of this elements is one fewer than number of zones. Because the highest
|
||||
zone's value is not necessary for following calculation.
|
||||
|
||||
But, these values are not used directly. The kernel calculates # of protection
|
||||
pages for each zones from them. These are shown as array of protection pages
|
||||
in /proc/zoneinfo like followings. (This is an example of x86-64 box).
|
||||
Each zone has an array of protection pages like this.
|
||||
|
||||
-
|
||||
Node 0, zone DMA
|
||||
pages free 1355
|
||||
min 3
|
||||
low 3
|
||||
high 4
|
||||
:
|
||||
:
|
||||
numa_other 0
|
||||
protection: (0, 2004, 2004, 2004)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
pagesets
|
||||
cpu: 0 pcp: 0
|
||||
:
|
||||
-
|
||||
These protections are added to score to judge whether this zone should be used
|
||||
for page allocation or should be reclaimed.
|
||||
|
||||
In this example, if normal pages (index=2) are required to this DMA zone and
|
||||
pages_high is used for watermark, the kernel judges this zone should not be
|
||||
used because pages_free(1355) is smaller than watermark + protection[2]
|
||||
(4 + 2004 = 2008). If this protection value is 0, this zone would be used for
|
||||
normal page requirement. If requirement is DMA zone(index=0), protection[0]
|
||||
(=0) is used.
|
||||
|
||||
zone[i]'s protection[j] is calculated by following expression.
|
||||
|
||||
(i < j):
|
||||
zone[i]->protection[j]
|
||||
= (total sums of present_pages from zone[i+1] to zone[j] on the node)
|
||||
/ lowmem_reserve_ratio[i];
|
||||
(i = j):
|
||||
(should not be protected. = 0;
|
||||
(i > j):
|
||||
(not necessary, but looks 0)
|
||||
|
||||
The default values of lowmem_reserve_ratio[i] are
|
||||
256 (if zone[i] means DMA or DMA32 zone)
|
||||
32 (others).
|
||||
As above expression, they are reciprocal number of ratio.
|
||||
256 means 1/256. # of protection pages becomes about "0.39%" of total present
|
||||
pages of higher zones on the node.
|
||||
|
||||
If you would like to protect more pages, smaller values are effective.
|
||||
The minimum value is 1 (1/1 -> 100%).
|
||||
|
||||
page-cluster
|
||||
------------
|
||||
|
||||
page-cluster controls the number of pages which are written to swap in
|
||||
a single attempt. The swap I/O size.
|
||||
|
||||
It is a logarithmic value - setting it to zero means "1 page", setting
|
||||
it to 1 means "2 pages", setting it to 2 means "4 pages", etc.
|
||||
|
||||
The default value is three (eight pages at a time). There may be some
|
||||
small benefits in tuning this to a different value if your workload is
|
||||
swap-intensive.
|
||||
|
||||
overcommit_memory
|
||||
-----------------
|
||||
|
||||
Controls overcommit of system memory, possibly allowing processes
|
||||
to allocate (but not use) more memory than is actually available.
|
||||
|
||||
|
||||
0 - Heuristic overcommit handling. Obvious overcommits of
|
||||
address space are refused. Used for a typical system. It
|
||||
ensures a seriously wild allocation fails while allowing
|
||||
overcommit to reduce swap usage. root is allowed to
|
||||
allocate slightly more memory in this mode. This is the
|
||||
default.
|
||||
|
||||
1 - Always overcommit. Appropriate for some scientific
|
||||
applications.
|
||||
|
||||
2 - Don't overcommit. The total address space commit
|
||||
for the system is not permitted to exceed swap plus a
|
||||
configurable percentage (default is 50) of physical RAM.
|
||||
Depending on the percentage you use, in most situations
|
||||
this means a process will not be killed while attempting
|
||||
to use already-allocated memory but will receive errors
|
||||
on memory allocation as appropriate.
|
||||
|
||||
overcommit_ratio
|
||||
----------------
|
||||
|
||||
Percentage of physical memory size to include in overcommit calculations
|
||||
(see above.)
|
||||
|
||||
Memory allocation limit = swapspace + physmem * (overcommit_ratio / 100)
|
||||
|
||||
swapspace = total size of all swap areas
|
||||
physmem = size of physical memory in system
|
||||
|
||||
nr_hugepages and hugetlb_shm_group
|
||||
----------------------------------
|
||||
|
||||
nr_hugepages configures number of hugetlb page reserved for the system.
|
||||
|
||||
hugetlb_shm_group contains group id that is allowed to create SysV shared
|
||||
memory segment using hugetlb page.
|
||||
|
||||
hugepages_treat_as_movable
|
||||
--------------------------
|
||||
|
||||
This parameter is only useful when kernelcore= is specified at boot time to
|
||||
create ZONE_MOVABLE for pages that may be reclaimed or migrated. Huge pages
|
||||
are not movable so are not normally allocated from ZONE_MOVABLE. A non-zero
|
||||
value written to hugepages_treat_as_movable allows huge pages to be allocated
|
||||
from ZONE_MOVABLE.
|
||||
|
||||
Once enabled, the ZONE_MOVABLE is treated as an area of memory the huge
|
||||
pages pool can easily grow or shrink within. Assuming that applications are
|
||||
not running that mlock() a lot of memory, it is likely the huge pages pool
|
||||
can grow to the size of ZONE_MOVABLE by repeatedly entering the desired value
|
||||
into nr_hugepages and triggering page reclaim.
|
||||
|
||||
laptop_mode
|
||||
-----------
|
||||
|
||||
laptop_mode is a knob that controls "laptop mode". All the things that are
|
||||
controlled by this knob are discussed in Documentation/laptops/laptop-mode.txt.
|
||||
|
||||
block_dump
|
||||
----------
|
||||
|
||||
block_dump enables block I/O debugging when set to a nonzero value. More
|
||||
information on block I/O debugging is in Documentation/laptops/laptop-mode.txt.
|
||||
|
||||
swap_token_timeout
|
||||
------------------
|
||||
|
||||
This file contains valid hold time of swap out protection token. The Linux
|
||||
VM has token based thrashing control mechanism and uses the token to prevent
|
||||
unnecessary page faults in thrashing situation. The unit of the value is
|
||||
second. The value would be useful to tune thrashing behavior.
|
||||
|
||||
drop_caches
|
||||
-----------
|
||||
|
||||
Writing to this will cause the kernel to drop clean caches, dentries and
|
||||
inodes from memory, causing that memory to become free.
|
||||
|
||||
To free pagecache:
|
||||
echo 1 > /proc/sys/vm/drop_caches
|
||||
To free dentries and inodes:
|
||||
echo 2 > /proc/sys/vm/drop_caches
|
||||
To free pagecache, dentries and inodes:
|
||||
echo 3 > /proc/sys/vm/drop_caches
|
||||
|
||||
As this is a non-destructive operation and dirty objects are not freeable, the
|
||||
user should run `sync' first.
|
||||
Please see: Documentation/sysctls/vm.txt for a description of these
|
||||
entries.
|
||||
|
||||
|
||||
2.5 /proc/sys/dev - Device specific parameters
|
||||
@ -2286,6 +2027,34 @@ increase the likelihood of this process being killed by the oom-killer. Valid
|
||||
values are in the range -16 to +15, plus the special value -17, which disables
|
||||
oom-killing altogether for this process.
|
||||
|
||||
The process to be killed in an out-of-memory situation is selected among all others
|
||||
based on its badness score. This value equals the original memory size of the process
|
||||
and is then updated according to its CPU time (utime + stime) and the
|
||||
run time (uptime - start time). The longer it runs the smaller is the score.
|
||||
Badness score is divided by the square root of the CPU time and then by
|
||||
the double square root of the run time.
|
||||
|
||||
Swapped out tasks are killed first. Half of each child's memory size is added to
|
||||
the parent's score if they do not share the same memory. Thus forking servers
|
||||
are the prime candidates to be killed. Having only one 'hungry' child will make
|
||||
parent less preferable than the child.
|
||||
|
||||
/proc/<pid>/oom_score shows process' current badness score.
|
||||
|
||||
The following heuristics are then applied:
|
||||
* if the task was reniced, its score doubles
|
||||
* superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE
|
||||
or CAP_SYS_RAWIO) have their score divided by 4
|
||||
* if oom condition happened in one cpuset and checked task does not belong
|
||||
to it, its score is divided by 8
|
||||
* the resulting score is multiplied by two to the power of oom_adj, i.e.
|
||||
points <<= oom_adj when it is positive and
|
||||
points >>= -(oom_adj) otherwise
|
||||
|
||||
The task with the highest badness score is then selected and its children
|
||||
are killed, process itself will be killed in an OOM situation when it does
|
||||
not have children or some of them disabled oom like described above.
|
||||
|
||||
2.13 /proc/<pid>/oom_score - Display current oom-killer score
|
||||
-------------------------------------------------------------
|
||||
|
||||
|
225
Documentation/filesystems/squashfs.txt
Normal file
225
Documentation/filesystems/squashfs.txt
Normal file
@ -0,0 +1,225 @@
|
||||
SQUASHFS 4.0 FILESYSTEM
|
||||
=======================
|
||||
|
||||
Squashfs is a compressed read-only filesystem for Linux.
|
||||
It uses zlib compression to compress files, inodes and directories.
|
||||
Inodes in the system are very small and all blocks are packed to minimise
|
||||
data overhead. Block sizes greater than 4K are supported up to a maximum
|
||||
of 1Mbytes (default block size 128K).
|
||||
|
||||
Squashfs is intended for general read-only filesystem use, for archival
|
||||
use (i.e. in cases where a .tar.gz file may be used), and in constrained
|
||||
block device/memory systems (e.g. embedded systems) where low overhead is
|
||||
needed.
|
||||
|
||||
Mailing list: squashfs-devel@lists.sourceforge.net
|
||||
Web site: www.squashfs.org
|
||||
|
||||
1. FILESYSTEM FEATURES
|
||||
----------------------
|
||||
|
||||
Squashfs filesystem features versus Cramfs:
|
||||
|
||||
Squashfs Cramfs
|
||||
|
||||
Max filesystem size: 2^64 16 MiB
|
||||
Max file size: ~ 2 TiB 16 MiB
|
||||
Max files: unlimited unlimited
|
||||
Max directories: unlimited unlimited
|
||||
Max entries per directory: unlimited unlimited
|
||||
Max block size: 1 MiB 4 KiB
|
||||
Metadata compression: yes no
|
||||
Directory indexes: yes no
|
||||
Sparse file support: yes no
|
||||
Tail-end packing (fragments): yes no
|
||||
Exportable (NFS etc.): yes no
|
||||
Hard link support: yes no
|
||||
"." and ".." in readdir: yes no
|
||||
Real inode numbers: yes no
|
||||
32-bit uids/gids: yes no
|
||||
File creation time: yes no
|
||||
Xattr and ACL support: no no
|
||||
|
||||
Squashfs compresses data, inodes and directories. In addition, inode and
|
||||
directory data are highly compacted, and packed on byte boundaries. Each
|
||||
compressed inode is on average 8 bytes in length (the exact length varies on
|
||||
file type, i.e. regular file, directory, symbolic link, and block/char device
|
||||
inodes have different sizes).
|
||||
|
||||
2. USING SQUASHFS
|
||||
-----------------
|
||||
|
||||
As squashfs is a read-only filesystem, the mksquashfs program must be used to
|
||||
create populated squashfs filesystems. This and other squashfs utilities
|
||||
can be obtained from http://www.squashfs.org. Usage instructions can be
|
||||
obtained from this site also.
|
||||
|
||||
|
||||
3. SQUASHFS FILESYSTEM DESIGN
|
||||
-----------------------------
|
||||
|
||||
A squashfs filesystem consists of seven parts, packed together on a byte
|
||||
alignment:
|
||||
|
||||
---------------
|
||||
| superblock |
|
||||
|---------------|
|
||||
| datablocks |
|
||||
| & fragments |
|
||||
|---------------|
|
||||
| inode table |
|
||||
|---------------|
|
||||
| directory |
|
||||
| table |
|
||||
|---------------|
|
||||
| fragment |
|
||||
| table |
|
||||
|---------------|
|
||||
| export |
|
||||
| table |
|
||||
|---------------|
|
||||
| uid/gid |
|
||||
| lookup table |
|
||||
---------------
|
||||
|
||||
Compressed data blocks are written to the filesystem as files are read from
|
||||
the source directory, and checked for duplicates. Once all file data has been
|
||||
written the completed inode, directory, fragment, export and uid/gid lookup
|
||||
tables are written.
|
||||
|
||||
3.1 Inodes
|
||||
----------
|
||||
|
||||
Metadata (inodes and directories) are compressed in 8Kbyte blocks. Each
|
||||
compressed block is prefixed by a two byte length, the top bit is set if the
|
||||
block is uncompressed. A block will be uncompressed if the -noI option is set,
|
||||
or if the compressed block was larger than the uncompressed block.
|
||||
|
||||
Inodes are packed into the metadata blocks, and are not aligned to block
|
||||
boundaries, therefore inodes overlap compressed blocks. Inodes are identified
|
||||
by a 48-bit number which encodes the location of the compressed metadata block
|
||||
containing the inode, and the byte offset into that block where the inode is
|
||||
placed (<block, offset>).
|
||||
|
||||
To maximise compression there are different inodes for each file type
|
||||
(regular file, directory, device, etc.), the inode contents and length
|
||||
varying with the type.
|
||||
|
||||
To further maximise compression, two types of regular file inode and
|
||||
directory inode are defined: inodes optimised for frequently occurring
|
||||
regular files and directories, and extended types where extra
|
||||
information has to be stored.
|
||||
|
||||
3.2 Directories
|
||||
---------------
|
||||
|
||||
Like inodes, directories are packed into compressed metadata blocks, stored
|
||||
in a directory table. Directories are accessed using the start address of
|
||||
the metablock containing the directory and the offset into the
|
||||
decompressed block (<block, offset>).
|
||||
|
||||
Directories are organised in a slightly complex way, and are not simply
|
||||
a list of file names. The organisation takes advantage of the
|
||||
fact that (in most cases) the inodes of the files will be in the same
|
||||
compressed metadata block, and therefore, can share the start block.
|
||||
Directories are therefore organised in a two level list, a directory
|
||||
header containing the shared start block value, and a sequence of directory
|
||||
entries, each of which share the shared start block. A new directory header
|
||||
is written once/if the inode start block changes. The directory
|
||||
header/directory entry list is repeated as many times as necessary.
|
||||
|
||||
Directories are sorted, and can contain a directory index to speed up
|
||||
file lookup. Directory indexes store one entry per metablock, each entry
|
||||
storing the index/filename mapping to the first directory header
|
||||
in each metadata block. Directories are sorted in alphabetical order,
|
||||
and at lookup the index is scanned linearly looking for the first filename
|
||||
alphabetically larger than the filename being looked up. At this point the
|
||||
location of the metadata block the filename is in has been found.
|
||||
The general idea of the index is ensure only one metadata block needs to be
|
||||
decompressed to do a lookup irrespective of the length of the directory.
|
||||
This scheme has the advantage that it doesn't require extra memory overhead
|
||||
and doesn't require much extra storage on disk.
|
||||
|
||||
3.3 File data
|
||||
-------------
|
||||
|
||||
Regular files consist of a sequence of contiguous compressed blocks, and/or a
|
||||
compressed fragment block (tail-end packed block). The compressed size
|
||||
of each datablock is stored in a block list contained within the
|
||||
file inode.
|
||||
|
||||
To speed up access to datablocks when reading 'large' files (256 Mbytes or
|
||||
larger), the code implements an index cache that caches the mapping from
|
||||
block index to datablock location on disk.
|
||||
|
||||
The index cache allows Squashfs to handle large files (up to 1.75 TiB) while
|
||||
retaining a simple and space-efficient block list on disk. The cache
|
||||
is split into slots, caching up to eight 224 GiB files (128 KiB blocks).
|
||||
Larger files use multiple slots, with 1.75 TiB files using all 8 slots.
|
||||
The index cache is designed to be memory efficient, and by default uses
|
||||
16 KiB.
|
||||
|
||||
3.4 Fragment lookup table
|
||||
-------------------------
|
||||
|
||||
Regular files can contain a fragment index which is mapped to a fragment
|
||||
location on disk and compressed size using a fragment lookup table. This
|
||||
fragment lookup table is itself stored compressed into metadata blocks.
|
||||
A second index table is used to locate these. This second index table for
|
||||
speed of access (and because it is small) is read at mount time and cached
|
||||
in memory.
|
||||
|
||||
3.5 Uid/gid lookup table
|
||||
------------------------
|
||||
|
||||
For space efficiency regular files store uid and gid indexes, which are
|
||||
converted to 32-bit uids/gids using an id look up table. This table is
|
||||
stored compressed into metadata blocks. A second index table is used to
|
||||
locate these. This second index table for speed of access (and because it
|
||||
is small) is read at mount time and cached in memory.
|
||||
|
||||
3.6 Export table
|
||||
----------------
|
||||
|
||||
To enable Squashfs filesystems to be exportable (via NFS etc.) filesystems
|
||||
can optionally (disabled with the -no-exports Mksquashfs option) contain
|
||||
an inode number to inode disk location lookup table. This is required to
|
||||
enable Squashfs to map inode numbers passed in filehandles to the inode
|
||||
location on disk, which is necessary when the export code reinstantiates
|
||||
expired/flushed inodes.
|
||||
|
||||
This table is stored compressed into metadata blocks. A second index table is
|
||||
used to locate these. This second index table for speed of access (and because
|
||||
it is small) is read at mount time and cached in memory.
|
||||
|
||||
|
||||
4. TODOS AND OUTSTANDING ISSUES
|
||||
-------------------------------
|
||||
|
||||
4.1 Todo list
|
||||
-------------
|
||||
|
||||
Implement Xattr and ACL support. The Squashfs 4.0 filesystem layout has hooks
|
||||
for these but the code has not been written. Once the code has been written
|
||||
the existing layout should not require modification.
|
||||
|
||||
4.2 Squashfs internal cache
|
||||
---------------------------
|
||||
|
||||
Blocks in Squashfs are compressed. To avoid repeatedly decompressing
|
||||
recently accessed data Squashfs uses two small metadata and fragment caches.
|
||||
|
||||
The cache is not used for file datablocks, these are decompressed and cached in
|
||||
the page-cache in the normal way. The cache is used to temporarily cache
|
||||
fragment and metadata blocks which have been read as a result of a metadata
|
||||
(i.e. inode or directory) or fragment access. Because metadata and fragments
|
||||
are packed together into blocks (to gain greater compression) the read of a
|
||||
particular piece of metadata or fragment will retrieve other metadata/fragments
|
||||
which have been packed with it, these because of locality-of-reference may be
|
||||
read in the near future. Temporarily caching them ensures they are available
|
||||
for near future access without requiring an additional read and decompress.
|
||||
|
||||
In the future this internal cache may be replaced with an implementation which
|
||||
uses the kernel page cache. Because the page cache operates on page sized
|
||||
units this may introduce additional complexity in terms of locking and
|
||||
associated race conditions.
|
@ -79,13 +79,6 @@ Mount options
|
||||
|
||||
(*) == default.
|
||||
|
||||
norm_unmount (*) commit on unmount; the journal is committed
|
||||
when the file-system is unmounted so that the
|
||||
next mount does not have to replay the journal
|
||||
and it becomes very fast;
|
||||
fast_unmount do not commit on unmount; this option makes
|
||||
unmount faster, but the next mount slower
|
||||
because of the need to replay the journal.
|
||||
bulk_read read more in one go to take advantage of flash
|
||||
media that read faster sequentially
|
||||
no_bulk_read (*) do not bulk-read
|
||||
@ -95,6 +88,9 @@ no_chk_data_crc skip checking of CRCs on data nodes in order to
|
||||
of this option is that corruption of the contents
|
||||
of a file can go unnoticed.
|
||||
chk_data_crc (*) do not skip checking CRCs on data nodes
|
||||
compr=none override default compressor and set it to "none"
|
||||
compr=lzo override default compressor and set it to "lzo"
|
||||
compr=zlib override default compressor and set it to "zlib"
|
||||
|
||||
|
||||
Quick usage instructions
|
||||
|
@ -210,8 +210,8 @@ struct super_operations {
|
||||
void (*put_super) (struct super_block *);
|
||||
void (*write_super) (struct super_block *);
|
||||
int (*sync_fs)(struct super_block *sb, int wait);
|
||||
void (*write_super_lockfs) (struct super_block *);
|
||||
void (*unlockfs) (struct super_block *);
|
||||
int (*freeze_fs) (struct super_block *);
|
||||
int (*unfreeze_fs) (struct super_block *);
|
||||
int (*statfs) (struct dentry *, struct kstatfs *);
|
||||
int (*remount_fs) (struct super_block *, int *, char *);
|
||||
void (*clear_inode) (struct inode *);
|
||||
@ -270,11 +270,11 @@ or bottom half).
|
||||
a superblock. The second parameter indicates whether the method
|
||||
should wait until the write out has been completed. Optional.
|
||||
|
||||
write_super_lockfs: called when VFS is locking a filesystem and
|
||||
freeze_fs: called when VFS is locking a filesystem and
|
||||
forcing it into a consistent state. This method is currently
|
||||
used by the Logical Volume Manager (LVM).
|
||||
|
||||
unlockfs: called when VFS is unlocking a filesystem and making it writable
|
||||
unfreeze_fs: called when VFS is unlocking a filesystem and making it writable
|
||||
again.
|
||||
|
||||
statfs: called when the VFS needs to get filesystem statistics. This
|
||||
@ -733,7 +733,6 @@ struct file_operations {
|
||||
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
|
||||
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
int (*check_flags)(int);
|
||||
int (*dir_notify)(struct file *filp, unsigned long arg);
|
||||
int (*flock) (struct file *, int, struct file_lock *);
|
||||
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
|
||||
ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
|
||||
@ -800,8 +799,6 @@ otherwise noted.
|
||||
|
||||
check_flags: called by the fcntl(2) system call for F_SETFL command
|
||||
|
||||
dir_notify: called by the fcntl(2) system call for F_NOTIFY command
|
||||
|
||||
flock: called by the flock(2) system call
|
||||
|
||||
splice_write: called by the VFS to splice data from a pipe to a file. This
|
||||
@ -931,7 +928,7 @@ manipulate dentries:
|
||||
d_lookup: look up a dentry given its parent and path name component
|
||||
It looks up the child of that given name from the dcache
|
||||
hash table. If it is found, the reference count is incremented
|
||||
and the dentry is returned. The caller must use d_put()
|
||||
and the dentry is returned. The caller must use dput()
|
||||
to free the dentry when it finishes using it.
|
||||
|
||||
For further information on dentry locking, please refer to the document
|
||||
|
@ -74,7 +74,7 @@ a sensor.
|
||||
Notice that some banks have both a read and a write address this is how the
|
||||
uGuru determines if a read from or a write to the bank is taking place, thus
|
||||
when reading you should always use the read address and when writing the
|
||||
write address. The write address is always one (1) more then the read address.
|
||||
write address. The write address is always one (1) more than the read address.
|
||||
|
||||
|
||||
uGuru ready
|
||||
@ -121,7 +121,7 @@ Once all bytes have been read data will hold 0x09, but there is no reason to
|
||||
test for this. Notice that the number of bytes is bank address dependent see
|
||||
above and below.
|
||||
|
||||
After completing a successfull read it is advised to put the uGuru back in
|
||||
After completing a successful read it is advised to put the uGuru back in
|
||||
ready mode, so that it is ready for the next read / write cycle. This way
|
||||
if your program / driver is unloaded and later loaded again the detection
|
||||
algorithm described above will still work.
|
||||
@ -141,7 +141,7 @@ don't ask why this is the way it is.
|
||||
|
||||
Once DATA holds 0x01 read CMD it should hold 0xAC now.
|
||||
|
||||
After completing a successfull write it is advised to put the uGuru back in
|
||||
After completing a successful write it is advised to put the uGuru back in
|
||||
ready mode, so that it is ready for the next read / write cycle. This way
|
||||
if your program / driver is unloaded and later loaded again the detection
|
||||
algorithm described above will still work.
|
||||
@ -224,7 +224,7 @@ Bit 3: Beep if alarm (RW)
|
||||
Bit 4: 1 if alarm cause measured temp is over the warning threshold (R)
|
||||
Bit 5: 1 if alarm cause measured volt is over the max threshold (R)
|
||||
Bit 6: 1 if alarm cause measured volt is under the min threshold (R)
|
||||
Bit 7: Volt sensor: Shutdown if alarm persist for more then 4 seconds (RW)
|
||||
Bit 7: Volt sensor: Shutdown if alarm persist for more than 4 seconds (RW)
|
||||
Temp sensor: Shutdown if temp is over the shutdown threshold (RW)
|
||||
|
||||
* This bit is only honored/used by the uGuru if a temp sensor is connected
|
||||
@ -293,7 +293,7 @@ Byte 0:
|
||||
Alarm behaviour for the selected sensor. A 1 enables the described behaviour.
|
||||
Bit 0: Give an alarm if measured rpm is under the min threshold (RW)
|
||||
Bit 3: Beep if alarm (RW)
|
||||
Bit 7: Shutdown if alarm persist for more then 4 seconds (RW)
|
||||
Bit 7: Shutdown if alarm persist for more than 4 seconds (RW)
|
||||
|
||||
Byte 1:
|
||||
min threshold (scale as bank 0x26)
|
||||
|
@ -31,15 +31,11 @@ Each of the measured inputs (temperature, fan speed) has corresponding high/low
|
||||
limit values. The ADT7470 will signal an ALARM if any measured value exceeds
|
||||
either limit.
|
||||
|
||||
The ADT7470 DOES NOT sample all inputs continuously. A single pin on the
|
||||
ADT7470 is connected to a multitude of thermal diodes, but the chip must be
|
||||
instructed explicitly to read the multitude of diodes. If you want to use
|
||||
automatic fan control mode, you must manually read any of the temperature
|
||||
sensors or the fan control algorithm will not run. The chip WILL NOT DO THIS
|
||||
AUTOMATICALLY; this must be done from userspace. This may be a bug in the chip
|
||||
design, given that many other AD chips take care of this. The driver will not
|
||||
read the registers more often than once every 5 seconds. Further,
|
||||
configuration data is only read once per minute.
|
||||
The ADT7470 samples all inputs continuously. A kernel thread is started up for
|
||||
the purpose of periodically querying the temperature sensors, thus allowing the
|
||||
automatic fan pwm control to set the fan speed. The driver will not read the
|
||||
registers more often than once every 5 seconds. Further, configuration data is
|
||||
only read once per minute.
|
||||
|
||||
Special Features
|
||||
----------------
|
||||
@ -72,5 +68,6 @@ pwm#_auto_point2_temp.
|
||||
Notes
|
||||
-----
|
||||
|
||||
As stated above, the temperature inputs must be read periodically from
|
||||
userspace in order for the automatic pwm algorithm to run.
|
||||
The temperature inputs no longer need to be read periodically from userspace in
|
||||
order for the automatic pwm algorithm to run. This was the case for earlier
|
||||
versions of the driver.
|
||||
|
87
Documentation/hwmon/adt7475
Normal file
87
Documentation/hwmon/adt7475
Normal file
@ -0,0 +1,87 @@
|
||||
This describes the interface for the ADT7475 driver:
|
||||
|
||||
(there are 4 fans, numbered fan1 to fan4):
|
||||
|
||||
fanX_input Read the current speed of the fan (in RPMs)
|
||||
fanX_min Read/write the minimum speed of the fan. Dropping
|
||||
below this sets an alarm.
|
||||
|
||||
(there are three PWMs, numbered pwm1 to pwm3):
|
||||
|
||||
pwmX Read/write the current duty cycle of the PWM. Writes
|
||||
only have effect when auto mode is turned off (see
|
||||
below). Range is 0 - 255.
|
||||
|
||||
pwmX_enable Fan speed control method:
|
||||
|
||||
0 - No control (fan at full speed)
|
||||
1 - Manual fan speed control (using pwm[1-*])
|
||||
2 - Automatic fan speed control
|
||||
|
||||
pwmX_auto_channels_temp Select which channels affect this PWM
|
||||
|
||||
1 - TEMP1 controls PWM
|
||||
2 - TEMP2 controls PWM
|
||||
4 - TEMP3 controls PWM
|
||||
6 - TEMP2 and TEMP3 control PWM
|
||||
7 - All three inputs control PWM
|
||||
|
||||
pwmX_freq Read/write the PWM frequency in Hz. The number
|
||||
should be one of the following:
|
||||
|
||||
11 Hz
|
||||
14 Hz
|
||||
22 Hz
|
||||
29 Hz
|
||||
35 Hz
|
||||
44 Hz
|
||||
58 Hz
|
||||
88 Hz
|
||||
|
||||
pwmX_auto_point1_pwm Read/write the minimum PWM duty cycle in automatic mode
|
||||
|
||||
pwmX_auto_point2_pwm Read/write the maximum PWM duty cycle in automatic mode
|
||||
|
||||
(there are three temperature settings numbered temp1 to temp3):
|
||||
|
||||
tempX_input Read the current temperature. The value is in milli
|
||||
degrees of Celsius.
|
||||
|
||||
tempX_max Read/write the upper temperature limit - exceeding this
|
||||
will cause an alarm.
|
||||
|
||||
tempX_min Read/write the lower temperature limit - exceeding this
|
||||
will cause an alarm.
|
||||
|
||||
tempX_offset Read/write the temperature adjustment offset
|
||||
|
||||
tempX_crit Read/write the THERM limit for remote1.
|
||||
|
||||
tempX_crit_hyst Set the temperature value below crit where the
|
||||
fans will stay on - this helps drive the temperature
|
||||
low enough so it doesn't stay near the edge and
|
||||
cause THERM to keep tripping.
|
||||
|
||||
tempX_auto_point1_temp Read/write the minimum temperature where the fans will
|
||||
turn on in automatic mode.
|
||||
|
||||
tempX_auto_point2_temp Read/write the maximum temperature over which the fans
|
||||
will run in automatic mode. tempX_auto_point1_temp
|
||||
and tempX_auto_point2_temp together define the
|
||||
range of automatic control.
|
||||
|
||||
tempX_alarm Read a 1 if the max/min alarm is set
|
||||
tempX_fault Read a 1 if either temp1 or temp3 diode has a fault
|
||||
|
||||
(There are two voltage settings, in1 and in2):
|
||||
|
||||
inX_input Read the current voltage on VCC. Value is in
|
||||
millivolts.
|
||||
|
||||
inX_min read/write the minimum voltage limit.
|
||||
Dropping below this causes an alarm.
|
||||
|
||||
inX_max read/write the maximum voltage limit.
|
||||
Exceeding this causes an alarm.
|
||||
|
||||
inX_alarm Read a 1 if the max/min alarm is set.
|
89
Documentation/hwmon/f71882fg
Normal file
89
Documentation/hwmon/f71882fg
Normal file
@ -0,0 +1,89 @@
|
||||
Kernel driver f71882fg
|
||||
======================
|
||||
|
||||
Supported chips:
|
||||
* Fintek F71882FG and F71883FG
|
||||
Prefix: 'f71882fg'
|
||||
Addresses scanned: none, address read from Super I/O config space
|
||||
Datasheet: Available from the Fintek website
|
||||
* Fintek F71862FG and F71863FG
|
||||
Prefix: 'f71862fg'
|
||||
Addresses scanned: none, address read from Super I/O config space
|
||||
Datasheet: Available from the Fintek website
|
||||
* Fintek F8000
|
||||
Prefix: 'f8000'
|
||||
Addresses scanned: none, address read from Super I/O config space
|
||||
Datasheet: Not public
|
||||
|
||||
Author: Hans de Goede <hdegoede@redhat.com>
|
||||
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
Fintek F718xxFG/F8000 Super I/O chips include complete hardware monitoring
|
||||
capabilities. They can monitor up to 9 voltages (3 for the F8000), 4 fans and
|
||||
3 temperature sensors.
|
||||
|
||||
These chips also have fan controlling features, using either DC or PWM, in
|
||||
three different modes (one manual, two automatic).
|
||||
|
||||
The driver assumes that no more than one chip is present, which seems
|
||||
reasonable.
|
||||
|
||||
|
||||
Monitoring
|
||||
----------
|
||||
|
||||
The Voltage, Fan and Temperature Monitoring uses the standard sysfs
|
||||
interface as documented in sysfs-interface, without any exceptions.
|
||||
|
||||
|
||||
Fan Control
|
||||
-----------
|
||||
|
||||
Both PWM (pulse-width modulation) and DC fan speed control methods are
|
||||
supported. The right one to use depends on external circuitry on the
|
||||
motherboard, so the driver assumes that the BIOS set the method
|
||||
properly.
|
||||
|
||||
There are 2 modes to specify the speed of the fan, PWM duty cycle (or DC
|
||||
voltage) mode, where 0-100% duty cycle (0-100% of 12V) is specified. And RPM
|
||||
mode where the actual RPM of the fan (as measured) is controlled and the speed
|
||||
gets specified as 0-100% of the fan#_full_speed file.
|
||||
|
||||
Since both modes work in a 0-100% (mapped to 0-255) scale, there isn't a
|
||||
whole lot of a difference when modifying fan control settings. The only
|
||||
important difference is that in RPM mode the 0-100% controls the fan speed
|
||||
between 0-100% of fan#_full_speed. It is assumed that if the BIOS programs
|
||||
RPM mode, it will also set fan#_full_speed properly, if it does not then
|
||||
fan control will not work properly, unless you set a sane fan#_full_speed
|
||||
value yourself.
|
||||
|
||||
Switching between these modes requires re-initializing a whole bunch of
|
||||
registers, so the mode which the BIOS has set is kept. The mode is
|
||||
printed when loading the driver.
|
||||
|
||||
Three different fan control modes are supported; the mode number is written
|
||||
to the pwm#_enable file. Note that not all modes are supported on all
|
||||
chips, and some modes may only be available in RPM / PWM mode on the F8000.
|
||||
Writing an unsupported mode will result in an invalid parameter error.
|
||||
|
||||
* 1: Manual mode
|
||||
You ask for a specific PWM duty cycle / DC voltage or a specific % of
|
||||
fan#_full_speed by writing to the pwm# file. This mode is only
|
||||
available on the F8000 if the fan channel is in RPM mode.
|
||||
|
||||
* 2: Normal auto mode
|
||||
You can define a number of temperature/fan speed trip points, which % the
|
||||
fan should run at at this temp and which temp a fan should follow using the
|
||||
standard sysfs interface. The number and type of trip points is chip
|
||||
depended, see which files are available in sysfs.
|
||||
Fan/PWM channel 3 of the F8000 is always in this mode!
|
||||
|
||||
* 3: Thermostat mode (Only available on the F8000 when in duty cycle mode)
|
||||
The fan speed is regulated to keep the temp the fan is mapped to between
|
||||
temp#_auto_point2_temp and temp#_auto_point3_temp.
|
||||
|
||||
Both of the automatic modes require that pwm1 corresponds to fan1, pwm2 to
|
||||
fan2 and pwm3 to fan3.
|
@ -26,6 +26,10 @@ Supported chips:
|
||||
Datasheet: Publicly available at the ITE website
|
||||
http://www.ite.com.tw/product_info/file/pc/IT8718F_V0.2.zip
|
||||
http://www.ite.com.tw/product_info/file/pc/IT8718F_V0%203_(for%20C%20version).zip
|
||||
* IT8720F
|
||||
Prefix: 'it8720'
|
||||
Addresses scanned: from Super I/O config space (8 I/O ports)
|
||||
Datasheet: Not yet publicly available.
|
||||
* SiS950 [clone of IT8705F]
|
||||
Prefix: 'it87'
|
||||
Addresses scanned: from Super I/O config space (8 I/O ports)
|
||||
@ -71,7 +75,7 @@ Description
|
||||
-----------
|
||||
|
||||
This driver implements support for the IT8705F, IT8712F, IT8716F,
|
||||
IT8718F, IT8726F and SiS950 chips.
|
||||
IT8718F, IT8720F, IT8726F and SiS950 chips.
|
||||
|
||||
These chips are 'Super I/O chips', supporting floppy disks, infrared ports,
|
||||
joysticks and other miscellaneous stuff. For hardware monitoring, they
|
||||
@ -84,19 +88,19 @@ the IT8716F and late IT8712F have 6. They are shared with other functions
|
||||
though, so the functionality may not be available on a given system.
|
||||
The driver dumbly assume it is there.
|
||||
|
||||
The IT8718F also features VID inputs (up to 8 pins) but the value is
|
||||
stored in the Super-I/O configuration space. Due to technical limitations,
|
||||
The IT8718F and IT8720F also features VID inputs (up to 8 pins) but the value
|
||||
is stored in the Super-I/O configuration space. Due to technical limitations,
|
||||
this value can currently only be read once at initialization time, so
|
||||
the driver won't notice and report changes in the VID value. The two
|
||||
upper VID bits share their pins with voltage inputs (in5 and in6) so you
|
||||
can't have both on a given board.
|
||||
|
||||
The IT8716F, IT8718F and later IT8712F revisions have support for
|
||||
The IT8716F, IT8718F, IT8720F and later IT8712F revisions have support for
|
||||
2 additional fans. The additional fans are supported by the driver.
|
||||
|
||||
The IT8716F and IT8718F, and late IT8712F and IT8705F also have optional
|
||||
16-bit tachometer counters for fans 1 to 3. This is better (no more fan
|
||||
clock divider mess) but not compatible with the older chips and
|
||||
The IT8716F, IT8718F and IT8720F, and late IT8712F and IT8705F also have
|
||||
optional 16-bit tachometer counters for fans 1 to 3. This is better (no more
|
||||
fan clock divider mess) but not compatible with the older chips and
|
||||
revisions. The 16-bit tachometer mode is enabled by the driver when one
|
||||
of the above chips is detected.
|
||||
|
||||
@ -122,7 +126,7 @@ zero'; this is important for negative voltage measurements. All voltage
|
||||
inputs can measure voltages between 0 and 4.08 volts, with a resolution of
|
||||
0.016 volt. The battery voltage in8 does not have limit registers.
|
||||
|
||||
The VID lines (IT8712F/IT8716F/IT8718F) encode the core voltage value:
|
||||
The VID lines (IT8712F/IT8716F/IT8718F/IT8720F) encode the core voltage value:
|
||||
the voltage level your processor should work with. This is hardcoded by
|
||||
the mainboard and/or processor itself. It is a value in volts.
|
||||
|
||||
|
@ -13,18 +13,21 @@ Author:
|
||||
Description
|
||||
-----------
|
||||
|
||||
This driver provides support for the accelerometer found in various HP laptops
|
||||
sporting the feature officially called "HP Mobile Data Protection System 3D" or
|
||||
"HP 3D DriveGuard". It detect automatically laptops with this sensor. Known models
|
||||
(for now the HP 2133, nc6420, nc2510, nc8510, nc84x0, nw9440 and nx9420) will
|
||||
have their axis automatically oriented on standard way (eg: you can directly
|
||||
play neverball). The accelerometer data is readable via
|
||||
This driver provides support for the accelerometer found in various HP
|
||||
laptops sporting the feature officially called "HP Mobile Data
|
||||
Protection System 3D" or "HP 3D DriveGuard". It detect automatically
|
||||
laptops with this sensor. Known models (for now the HP 2133, nc6420,
|
||||
nc2510, nc8510, nc84x0, nw9440 and nx9420) will have their axis
|
||||
automatically oriented on standard way (eg: you can directly play
|
||||
neverball). The accelerometer data is readable via
|
||||
/sys/devices/platform/lis3lv02d.
|
||||
|
||||
Sysfs attributes under /sys/devices/platform/lis3lv02d/:
|
||||
position - 3D position that the accelerometer reports. Format: "(x,y,z)"
|
||||
calibrate - read: values (x, y, z) that are used as the base for input class device operation.
|
||||
write: forces the base to be recalibrated with the current position.
|
||||
calibrate - read: values (x, y, z) that are used as the base for input
|
||||
class device operation.
|
||||
write: forces the base to be recalibrated with the current
|
||||
position.
|
||||
rate - reports the sampling rate of the accelerometer device in HZ
|
||||
|
||||
This driver also provides an absolute input class device, allowing
|
||||
@ -39,11 +42,12 @@ the accelerometer are converted into a "standard" organisation of the axes
|
||||
* When the laptop is horizontal the position reported is about 0 for X and Y
|
||||
and a positive value for Z
|
||||
* If the left side is elevated, X increases (becomes positive)
|
||||
* If the front side (where the touchpad is) is elevated, Y decreases (becomes negative)
|
||||
* If the front side (where the touchpad is) is elevated, Y decreases
|
||||
(becomes negative)
|
||||
* If the laptop is put upside-down, Z becomes negative
|
||||
|
||||
If your laptop model is not recognized (cf "dmesg"), you can send an email to the
|
||||
authors to add it to the database. When reporting a new laptop, please include
|
||||
the output of "dmidecode" plus the value of /sys/devices/platform/lis3lv02d/position
|
||||
in these four cases.
|
||||
If your laptop model is not recognized (cf "dmesg"), you can send an
|
||||
email to the authors to add it to the database. When reporting a new
|
||||
laptop, please include the output of "dmidecode" plus the value of
|
||||
/sys/devices/platform/lis3lv02d/position in these four cases.
|
||||
|
||||
|
@ -1,9 +1,11 @@
|
||||
Kernel driver lm70
|
||||
==================
|
||||
|
||||
Supported chip:
|
||||
Supported chips:
|
||||
* National Semiconductor LM70
|
||||
Datasheet: http://www.national.com/pf/LM/LM70.html
|
||||
* Texas Instruments TMP121/TMP123
|
||||
Information: http://focus.ti.com/docs/prod/folders/print/tmp121.html
|
||||
|
||||
Author:
|
||||
Kaiwan N Billimoria <kaiwan@designergraphix.com>
|
||||
@ -25,6 +27,14 @@ complement digital temperature (sent via the SIO line), is available in the
|
||||
driver for interpretation. This driver makes use of the kernel's in-core
|
||||
SPI support.
|
||||
|
||||
As a real (in-tree) example of this "SPI protocol driver" interfacing
|
||||
with a "SPI master controller driver", see drivers/spi/spi_lm70llp.c
|
||||
and its associated documentation.
|
||||
|
||||
The TMP121/TMP123 are very similar; main differences are 4 wire SPI inter-
|
||||
face (read only) and 13-bit temperature data (0.0625 degrees celsius reso-
|
||||
lution).
|
||||
|
||||
Thanks to
|
||||
---------
|
||||
Jean Delvare <khali@linux-fr.org> for mentoring the hwmon-side driver
|
||||
|
@ -164,7 +164,7 @@ configured individually according to the following options.
|
||||
temperature. (PWM value from 0 to 255)
|
||||
|
||||
* pwm#_auto_pwm_minctl - this flags selects for temp#_auto_temp_off temperature
|
||||
the bahaviour of fans. Write 1 to let fans spinning at
|
||||
the behaviour of fans. Write 1 to let fans spinning at
|
||||
pwm#_auto_pwm_min or write 0 to let them off.
|
||||
|
||||
NOTE: It has been reported that there is a bug in the LM85 that causes the flag
|
||||
|
81
Documentation/hwmon/ltc4245
Normal file
81
Documentation/hwmon/ltc4245
Normal file
@ -0,0 +1,81 @@
|
||||
Kernel driver ltc4245
|
||||
=====================
|
||||
|
||||
Supported chips:
|
||||
* Linear Technology LTC4245
|
||||
Prefix: 'ltc4245'
|
||||
Addresses scanned: 0x20-0x3f
|
||||
Datasheet:
|
||||
http://www.linear.com/pc/downloadDocument.do?navId=H0,C1,C1003,C1006,C1140,P19392,D13517
|
||||
|
||||
Author: Ira W. Snyder <iws@ovro.caltech.edu>
|
||||
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
The LTC4245 controller allows a board to be safely inserted and removed
|
||||
from a live backplane in multiple supply systems such as CompactPCI and
|
||||
PCI Express.
|
||||
|
||||
|
||||
Usage Notes
|
||||
-----------
|
||||
|
||||
This driver does not probe for LTC4245 devices, due to the fact that some
|
||||
of the possible addresses are unfriendly to probing. You will need to use
|
||||
the "force" parameter to tell the driver where to find the device.
|
||||
|
||||
Example: the following will load the driver for an LTC4245 at address 0x23
|
||||
on I2C bus #1:
|
||||
$ modprobe ltc4245 force=1,0x23
|
||||
|
||||
|
||||
Sysfs entries
|
||||
-------------
|
||||
|
||||
The LTC4245 has built-in limits for over and under current warnings. This
|
||||
makes it very likely that the reference circuit will be used.
|
||||
|
||||
This driver uses the values in the datasheet to change the register values
|
||||
into the values specified in the sysfs-interface document. The current readings
|
||||
rely on the sense resistors listed in Table 2: "Sense Resistor Values".
|
||||
|
||||
in1_input 12v input voltage (mV)
|
||||
in2_input 5v input voltage (mV)
|
||||
in3_input 3v input voltage (mV)
|
||||
in4_input Vee (-12v) input voltage (mV)
|
||||
|
||||
in1_min_alarm 12v input undervoltage alarm
|
||||
in2_min_alarm 5v input undervoltage alarm
|
||||
in3_min_alarm 3v input undervoltage alarm
|
||||
in4_min_alarm Vee (-12v) input undervoltage alarm
|
||||
|
||||
curr1_input 12v current (mA)
|
||||
curr2_input 5v current (mA)
|
||||
curr3_input 3v current (mA)
|
||||
curr4_input Vee (-12v) current (mA)
|
||||
|
||||
curr1_max_alarm 12v overcurrent alarm
|
||||
curr2_max_alarm 5v overcurrent alarm
|
||||
curr3_max_alarm 3v overcurrent alarm
|
||||
curr4_max_alarm Vee (-12v) overcurrent alarm
|
||||
|
||||
in5_input 12v output voltage (mV)
|
||||
in6_input 5v output voltage (mV)
|
||||
in7_input 3v output voltage (mV)
|
||||
in8_input Vee (-12v) output voltage (mV)
|
||||
|
||||
in5_min_alarm 12v output undervoltage alarm
|
||||
in6_min_alarm 5v output undervoltage alarm
|
||||
in7_min_alarm 3v output undervoltage alarm
|
||||
in8_min_alarm Vee (-12v) output undervoltage alarm
|
||||
|
||||
in9_input GPIO #1 voltage data
|
||||
in10_input GPIO #2 voltage data
|
||||
in11_input GPIO #3 voltage data
|
||||
|
||||
power1_input 12v power usage (mW)
|
||||
power2_input 5v power usage (mW)
|
||||
power3_input 3v power usage (mW)
|
||||
power4_input Vee (-12v) power usage (mW)
|
@ -11,3 +11,8 @@ unplug old device(s) and plug new device(s)
|
||||
# echo -n "1" > /sys/class/ide_port/idex/scan
|
||||
|
||||
done
|
||||
|
||||
NOTE: please make sure that partitions are unmounted and that there are
|
||||
no other active references to devices before doing "delete_devices" step,
|
||||
also do not attempt "scan" step on devices currently in use -- otherwise
|
||||
results may be unpredictable and lead to data loss if you're unlucky
|
||||
|
109
Documentation/input/walkera0701.txt
Normal file
109
Documentation/input/walkera0701.txt
Normal file
@ -0,0 +1,109 @@
|
||||
|
||||
Walkera WK-0701 transmitter is supplied with a ready to fly Walkera
|
||||
helicopters such as HM36, HM37, HM60. The walkera0701 module enables to use
|
||||
this transmitter as joystick
|
||||
|
||||
Devel homepage and download:
|
||||
http://zub.fei.tuke.sk/walkera-wk0701/
|
||||
|
||||
or use cogito:
|
||||
cg-clone http://zub.fei.tuke.sk/GIT/walkera0701-joystick
|
||||
|
||||
|
||||
Connecting to PC:
|
||||
|
||||
At back side of transmitter S-video connector can be found. Modulation
|
||||
pulses from processor to HF part can be found at pin 2 of this connector,
|
||||
pin 3 is GND. Between pin 3 and CPU 5k6 resistor can be found. To get
|
||||
modulation pulses to PC, signal pulses must be amplified.
|
||||
|
||||
Cable: (walkera TX to parport)
|
||||
|
||||
Walkera WK-0701 TX S-VIDEO connector:
|
||||
(back side of TX)
|
||||
__ __ S-video: canon25
|
||||
/ |_| \ pin 2 (signal) NPN parport
|
||||
/ O 4 3 O \ pin 3 (GND) LED ________________ 10 ACK
|
||||
( O 2 1 O ) | C
|
||||
\ ___ / 2 ________________________|\|_____|/
|
||||
| [___] | |/| B |\
|
||||
------- 3 __________________________________|________________ 25 GND
|
||||
E
|
||||
|
||||
|
||||
I use green LED and BC109 NPN transistor.
|
||||
|
||||
Software:
|
||||
|
||||
Build kernel with walkera0701 module. Module walkera0701 need exclusive
|
||||
access to parport, modules like lp must be unloaded before loading
|
||||
walkera0701 module, check dmesg for error messages. Connect TX to PC by
|
||||
cable and run jstest /dev/input/js0 to see values from TX. If no value can
|
||||
be changed by TX "joystick", check output from /proc/interrupts. Value for
|
||||
(usually irq7) parport must increase if TX is on.
|
||||
|
||||
|
||||
|
||||
Technical details:
|
||||
|
||||
Driver use interrupt from parport ACK input bit to measure pulse length
|
||||
using hrtimers.
|
||||
|
||||
Frame format:
|
||||
Based on walkera WK-0701 PCM Format description by Shaul Eizikovich.
|
||||
(downloaded from http://www.smartpropoplus.com/Docs/Walkera_Wk-0701_PCM.pdf)
|
||||
|
||||
Signal pulses:
|
||||
(ANALOG)
|
||||
SYNC BIN OCT
|
||||
+---------+ +------+
|
||||
| | | |
|
||||
--+ +------+ +---
|
||||
|
||||
Frame:
|
||||
SYNC , BIN1, OCT1, BIN2, OCT2 ... BIN24, OCT24, BIN25, next frame SYNC ..
|
||||
|
||||
pulse length:
|
||||
Binary values: Analog octal values:
|
||||
|
||||
288 uS Binary 0 318 uS 000
|
||||
438 uS Binary 1 398 uS 001
|
||||
478 uS 010
|
||||
558 uS 011
|
||||
638 uS 100
|
||||
1306 uS SYNC 718 uS 101
|
||||
798 uS 110
|
||||
878 uS 111
|
||||
|
||||
24 bin+oct values + 1 bin value = 24*4+1 bits = 97 bits
|
||||
|
||||
(Warning, pulses on ACK ar inverted by transistor, irq is rised up on sync
|
||||
to bin change or octal value to bin change).
|
||||
|
||||
Binary data representations:
|
||||
|
||||
One binary and octal value can be grouped to nibble. 24 nibbles + one binary
|
||||
values can be sampled between sync pulses.
|
||||
|
||||
Values for first four channels (analog joystick values) can be found in
|
||||
first 10 nibbles. Analog value is represented by one sign bit and 9 bit
|
||||
absolute binary value. (10 bits per channel). Next nibble is checksum for
|
||||
first ten nibbles.
|
||||
|
||||
Next nibbles 12 .. 21 represents four channels (not all channels can be
|
||||
directly controlled from TX). Binary representations ar the same as in first
|
||||
four channels. In nibbles 22 and 23 is a special magic number. Nibble 24 is
|
||||
checksum for nibbles 12..23.
|
||||
|
||||
After last octal value for nibble 24 and next sync pulse one additional
|
||||
binary value can be sampled. This bit and magic number is not used in
|
||||
software driver. Some details about this magic numbers can be found in
|
||||
Walkera_Wk-0701_PCM.pdf.
|
||||
|
||||
Checksum calculation:
|
||||
|
||||
Summary of octal values in nibbles must be same as octal value in checksum
|
||||
nibble (only first 3 bits are used). Binary value for checksum nibble is
|
||||
calculated by sum of binary values in checked nibbles + sum of octal values
|
||||
in checked nibbles divided by 8. Only bit 0 of this sum is used.
|
||||
|
@ -84,7 +84,7 @@ Code Seq# Include File Comments
|
||||
'B' C0-FF advanced bbus
|
||||
<mailto:maassen@uni-freiburg.de>
|
||||
'C' all linux/soundcard.h
|
||||
'D' all asm-s390/dasd.h
|
||||
'D' all arch/s390/include/asm/dasd.h
|
||||
'E' all linux/input.h
|
||||
'F' all linux/fb.h
|
||||
'H' all linux/hiddev.h
|
||||
@ -97,6 +97,7 @@ Code Seq# Include File Comments
|
||||
<http://linux01.gwdg.de/~alatham/ppdd.html>
|
||||
'M' all linux/soundcard.h
|
||||
'N' 00-1F drivers/usb/scanner.h
|
||||
'O' 00-02 include/mtd/ubi-user.h UBI
|
||||
'P' all linux/soundcard.h
|
||||
'Q' all linux/soundcard.h
|
||||
'R' 00-1F linux/random.h
|
||||
@ -104,7 +105,7 @@ Code Seq# Include File Comments
|
||||
'S' 80-81 scsi/scsi_ioctl.h conflict!
|
||||
'S' 82-FF scsi/scsi.h conflict!
|
||||
'T' all linux/soundcard.h conflict!
|
||||
'T' all asm-i386/ioctls.h conflict!
|
||||
'T' all arch/x86/include/asm/ioctls.h conflict!
|
||||
'U' 00-EF linux/drivers/usb/usb.h
|
||||
'V' all linux/vt.h
|
||||
'W' 00-1F linux/watchdog.h conflict!
|
||||
@ -119,7 +120,7 @@ Code Seq# Include File Comments
|
||||
<mailto:natalia@nikhefk.nikhef.nl>
|
||||
'c' 00-7F linux/comstats.h conflict!
|
||||
'c' 00-7F linux/coda.h conflict!
|
||||
'c' 80-9F asm-s390/chsc.h
|
||||
'c' 80-9F arch/s390/include/asm/chsc.h
|
||||
'd' 00-FF linux/char/drm/drm/h conflict!
|
||||
'd' 00-DF linux/video_decoder.h conflict!
|
||||
'd' F0-FF linux/digi1.h
|
||||
@ -142,6 +143,9 @@ Code Seq# Include File Comments
|
||||
'n' 00-7F linux/ncp_fs.h
|
||||
'n' E0-FF video/matrox.h matroxfb
|
||||
'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2
|
||||
'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps)
|
||||
'o' 40-41 include/mtd/ubi-user.h UBI
|
||||
'o' 01-A1 include/linux/dvb/*.h DVB
|
||||
'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this)
|
||||
'p' 00-3F linux/mc146818rtc.h conflict!
|
||||
'p' 40-7F linux/nvram.h
|
||||
@ -166,7 +170,7 @@ Code Seq# Include File Comments
|
||||
<mailto:oe@port.de>
|
||||
0x80 00-1F linux/fb.h
|
||||
0x81 00-1F linux/videotext.h
|
||||
0x89 00-06 asm-i386/sockios.h
|
||||
0x89 00-06 arch/x86/include/asm/sockios.h
|
||||
0x89 0B-DF linux/sockios.h
|
||||
0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
|
||||
0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
|
||||
|
@ -12,11 +12,11 @@ file at first.
|
||||
|
||||
==================================
|
||||
これは、
|
||||
linux-2.6.24/Documentation/stable_kernel_rules.txt
|
||||
linux-2.6.29/Documentation/stable_kernel_rules.txt
|
||||
の和訳です。
|
||||
|
||||
翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
|
||||
翻訳日: 2007/12/30
|
||||
翻訳日: 2009/1/14
|
||||
翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com>
|
||||
校正者: 武井伸光さん、<takei at webmasters dot gr dot jp>
|
||||
かねこさん (Seiji Kaneko) <skaneko at a2 dot mbn dot or dot jp>
|
||||
@ -38,12 +38,15 @@ linux-2.6.24/Documentation/stable_kernel_rules.txt
|
||||
- ビルドエラー(CONFIG_BROKENになっているものを除く), oops, ハング、デー
|
||||
タ破壊、現実のセキュリティ問題、その他 "ああ、これはダメだね"という
|
||||
ようなものを修正しなければならない。短く言えば、重大な問題。
|
||||
- 新しい device ID とクオークも受け入れられる。
|
||||
- どのように競合状態が発生するかの説明も一緒に書かれていない限り、
|
||||
"理論的には競合状態になる"ようなものは不可。
|
||||
- いかなる些細な修正も含めることはできない。(スペルの修正、空白のクリー
|
||||
ンアップなど)
|
||||
- 対応するサブシステムメンテナが受け入れたものでなければならない。
|
||||
- Documentation/SubmittingPatches の規則に従ったものでなければならない。
|
||||
- パッチ自体か同等の修正が Linus のツリーに既に存在しなければならない。
|
||||
Linus のツリーでのコミットID を -stable へのパッチ投稿の際に引用す
|
||||
ること。
|
||||
|
||||
-stable ツリーにパッチを送付する手続き-
|
||||
|
||||
@ -52,8 +55,10 @@ linux-2.6.24/Documentation/stable_kernel_rules.txt
|
||||
- 送信者はパッチがキューに受け付けられた際には ACK を、却下された場合
|
||||
には NAK を受け取る。この反応は開発者たちのスケジュールによって、数
|
||||
日かかる場合がある。
|
||||
- もし受け取られたら、パッチは他の開発者たちのレビューのために
|
||||
-stable キューに追加される。
|
||||
- もし受け取られたら、パッチは他の開発者たちと関連するサブシステムの
|
||||
メンテナーによるレビューのために -stable キューに追加される。
|
||||
- パッチに stable@kernel.org のアドレスが付加されているときには、それ
|
||||
が Linus のツリーに入る時に自動的に stable チームに email される。
|
||||
- セキュリティパッチはこのエイリアス (stable@kernel.org) に送られるべ
|
||||
きではなく、代わりに security@kernel.org のアドレスに送られる。
|
||||
|
||||
|
@ -1,5 +1,9 @@
|
||||
00-INDEX
|
||||
- this file: info on the kernel build process
|
||||
- this file: info on the kernel build process
|
||||
kbuild.txt
|
||||
- developer information on kbuild
|
||||
kconfig.txt
|
||||
- usage help for make *config
|
||||
kconfig-language.txt
|
||||
- specification of Config Language, the language in Kconfig files
|
||||
makefiles.txt
|
||||
|
134
Documentation/kbuild/kbuild.txt
Normal file
134
Documentation/kbuild/kbuild.txt
Normal file
@ -0,0 +1,134 @@
|
||||
Environment variables
|
||||
|
||||
KCPPFLAGS
|
||||
--------------------------------------------------
|
||||
Additional options to pass when preprocessing. The preprocessing options
|
||||
will be used in all cases where kbuild does preprocessing including
|
||||
building C files and assembler files.
|
||||
|
||||
KAFLAGS
|
||||
--------------------------------------------------
|
||||
Additional options to the assembler.
|
||||
|
||||
KCFLAGS
|
||||
--------------------------------------------------
|
||||
Additional options to the C compiler.
|
||||
|
||||
KBUILD_VERBOSE
|
||||
--------------------------------------------------
|
||||
Set the kbuild verbosity. Can be assigned same values as "V=...".
|
||||
See make help for the full list.
|
||||
Setting "V=..." takes precedence over KBUILD_VERBOSE.
|
||||
|
||||
KBUILD_EXTMOD
|
||||
--------------------------------------------------
|
||||
Set the directory to look for the kernel source when building external
|
||||
modules.
|
||||
The directory can be specified in several ways:
|
||||
1) Use "M=..." on the command line
|
||||
2) Environmnet variable KBUILD_EXTMOD
|
||||
3) Environmnet variable SUBDIRS
|
||||
The possibilities are listed in the order they take precedence.
|
||||
Using "M=..." will always override the others.
|
||||
|
||||
KBUILD_OUTPUT
|
||||
--------------------------------------------------
|
||||
Specify the output directory when building the kernel.
|
||||
The output directory can also be specificed using "O=...".
|
||||
Setting "O=..." takes precedence over KBUILD_OUTPUT.
|
||||
|
||||
ARCH
|
||||
--------------------------------------------------
|
||||
Set ARCH to the architecture to be built.
|
||||
In most cases the name of the architecture is the same as the
|
||||
directory name found in the arch/ directory.
|
||||
But some architectures such as x86 and sparc have aliases.
|
||||
x86: i386 for 32 bit, x86_64 for 64 bit
|
||||
sparc: sparc for 32 bit, sparc64 for 64 bit
|
||||
|
||||
CROSS_COMPILE
|
||||
--------------------------------------------------
|
||||
Specify an optional fixed part of the binutils filename.
|
||||
CROSS_COMPILE can be a part of the filename or the full path.
|
||||
|
||||
CROSS_COMPILE is also used for ccache is some setups.
|
||||
|
||||
CF
|
||||
--------------------------------------------------
|
||||
Additional options for sparse.
|
||||
CF is often used on the command-line like this:
|
||||
|
||||
make CF=-Wbitwise C=2
|
||||
|
||||
INSTALL_PATH
|
||||
--------------------------------------------------
|
||||
INSTALL_PATH specifies where to place the updated kernel and system map
|
||||
images. Default is /boot, but you can set it to other values.
|
||||
|
||||
|
||||
MODLIB
|
||||
--------------------------------------------------
|
||||
Specify where to install modules.
|
||||
The default value is:
|
||||
|
||||
$(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE)
|
||||
|
||||
The value can be overridden in which case the default value is ignored.
|
||||
|
||||
INSTALL_MOD_PATH
|
||||
--------------------------------------------------
|
||||
INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory
|
||||
relocations required by build roots. This is not defined in the
|
||||
makefile but the argument can be passed to make if needed.
|
||||
|
||||
INSTALL_MOD_STRIP
|
||||
--------------------------------------------------
|
||||
INSTALL_MOD_STRIP, if defined, will cause modules to be
|
||||
stripped after they are installed. If INSTALL_MOD_STRIP is '1', then
|
||||
the default option --strip-debug will be used. Otherwise,
|
||||
INSTALL_MOD_STRIP will used as the options to the strip command.
|
||||
|
||||
INSTALL_FW_PATH
|
||||
--------------------------------------------------
|
||||
INSTALL_FW_PATH specifies where to install the firmware blobs.
|
||||
The default value is:
|
||||
|
||||
$(INSTALL_MOD_PATH)/lib/firmware
|
||||
|
||||
The value can be overridden in which case the default value is ignored.
|
||||
|
||||
INSTALL_HDR_PATH
|
||||
--------------------------------------------------
|
||||
INSTALL_HDR_PATH specifies where to install user space headers when
|
||||
executing "make headers_*".
|
||||
The default value is:
|
||||
|
||||
$(objtree)/usr
|
||||
|
||||
$(objtree) is the directory where output files are saved.
|
||||
The output directory is often set using "O=..." on the commandline.
|
||||
|
||||
The value can be overridden in which case the default value is ignored.
|
||||
|
||||
KBUILD_MODPOST_WARN
|
||||
--------------------------------------------------
|
||||
KBUILD_MODPOST_WARN can be set to avoid errors in case of undefined
|
||||
symbols in the final module linking stage. It changes such errors
|
||||
into warnings.
|
||||
|
||||
KBUILD_MODPOST_NOFINAL
|
||||
--------------------------------------------------
|
||||
KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules.
|
||||
This is solely useful to speed up test compiles.
|
||||
|
||||
KBUILD_EXTRA_SYMBOLS
|
||||
--------------------------------------------------
|
||||
For modules that use symbols from other modules.
|
||||
See more details in modules.txt.
|
||||
|
||||
ALLSOURCE_ARCHS
|
||||
--------------------------------------------------
|
||||
For tags/TAGS/cscope targets, you can specify more than one arch
|
||||
to be included in the databases, separated by blank space. E.g.:
|
||||
|
||||
$ make ALLSOURCE_ARCHS="x86 mips arm" tags
|
188
Documentation/kbuild/kconfig.txt
Normal file
188
Documentation/kbuild/kconfig.txt
Normal file
@ -0,0 +1,188 @@
|
||||
This file contains some assistance for using "make *config".
|
||||
|
||||
Use "make help" to list all of the possible configuration targets.
|
||||
|
||||
The xconfig ('qconf') and menuconfig ('mconf') programs also
|
||||
have embedded help text. Be sure to check it for navigation,
|
||||
search, and other general help text.
|
||||
|
||||
======================================================================
|
||||
General
|
||||
--------------------------------------------------
|
||||
|
||||
New kernel releases often introduce new config symbols. Often more
|
||||
important, new kernel releases may rename config symbols. When
|
||||
this happens, using a previously working .config file and running
|
||||
"make oldconfig" won't necessarily produce a working new kernel
|
||||
for you, so you may find that you need to see what NEW kernel
|
||||
symbols have been introduced.
|
||||
|
||||
To see a list of new config symbols when using "make oldconfig", use
|
||||
|
||||
cp user/some/old.config .config
|
||||
yes "" | make oldconfig >conf.new
|
||||
|
||||
and the config program will list as (NEW) any new symbols that have
|
||||
unknown values. Of course, the .config file is also updated with
|
||||
new (default) values, so you can use:
|
||||
|
||||
grep "(NEW)" conf.new
|
||||
|
||||
to see the new config symbols or you can 'diff' the previous and
|
||||
new .config files to see the differences:
|
||||
|
||||
diff .config.old .config | less
|
||||
|
||||
(Yes, we need something better here.)
|
||||
|
||||
|
||||
======================================================================
|
||||
menuconfig
|
||||
--------------------------------------------------
|
||||
|
||||
SEARCHING for CONFIG symbols
|
||||
|
||||
Searching in menuconfig:
|
||||
|
||||
The Search function searches for kernel configuration symbol
|
||||
names, so you have to know something close to what you are
|
||||
looking for.
|
||||
|
||||
Example:
|
||||
/hotplug
|
||||
This lists all config symbols that contain "hotplug",
|
||||
e.g., HOTPLUG, HOTPLUG_CPU, MEMORY_HOTPLUG.
|
||||
|
||||
For search help, enter / followed TAB-TAB-TAB (to highlight
|
||||
<Help>) and Enter. This will tell you that you can also use
|
||||
regular expressions (regexes) in the search string, so if you
|
||||
are not interested in MEMORY_HOTPLUG, you could try
|
||||
|
||||
/^hotplug
|
||||
|
||||
|
||||
______________________________________________________________________
|
||||
Color Themes for 'menuconfig'
|
||||
|
||||
It is possible to select different color themes using the variable
|
||||
MENUCONFIG_COLOR. To select a theme use:
|
||||
|
||||
make MENUCONFIG_COLOR=<theme> menuconfig
|
||||
|
||||
Available themes are:
|
||||
mono => selects colors suitable for monochrome displays
|
||||
blackbg => selects a color scheme with black background
|
||||
classic => theme with blue background. The classic look
|
||||
bluetitle => a LCD friendly version of classic. (default)
|
||||
|
||||
______________________________________________________________________
|
||||
Environment variables in 'menuconfig'
|
||||
|
||||
KCONFIG_ALLCONFIG
|
||||
--------------------------------------------------
|
||||
(partially based on lkml email from/by Rob Landley, re: miniconfig)
|
||||
--------------------------------------------------
|
||||
The allyesconfig/allmodconfig/allnoconfig/randconfig variants can
|
||||
also use the environment variable KCONFIG_ALLCONFIG as a flag or a
|
||||
filename that contains config symbols that the user requires to be
|
||||
set to a specific value. If KCONFIG_ALLCONFIG is used without a
|
||||
filename, "make *config" checks for a file named
|
||||
"all{yes/mod/no/random}.config" (corresponding to the *config command
|
||||
that was used) for symbol values that are to be forced. If this file
|
||||
is not found, it checks for a file named "all.config" to contain forced
|
||||
values.
|
||||
|
||||
This enables you to create "miniature" config (miniconfig) or custom
|
||||
config files containing just the config symbols that you are interested
|
||||
in. Then the kernel config system generates the full .config file,
|
||||
including dependencies of your miniconfig file, based on the miniconfig
|
||||
file.
|
||||
|
||||
This 'KCONFIG_ALLCONFIG' file is a config file which contains
|
||||
(usually a subset of all) preset config symbols. These variable
|
||||
settings are still subject to normal dependency checks.
|
||||
|
||||
Examples:
|
||||
KCONFIG_ALLCONFIG=custom-notebook.config make allnoconfig
|
||||
or
|
||||
KCONFIG_ALLCONFIG=mini.config make allnoconfig
|
||||
or
|
||||
make KCONFIG_ALLCONFIG=mini.config allnoconfig
|
||||
|
||||
These examples will disable most options (allnoconfig) but enable or
|
||||
disable the options that are explicitly listed in the specified
|
||||
mini-config files.
|
||||
|
||||
KCONFIG_NOSILENTUPDATE
|
||||
--------------------------------------------------
|
||||
If this variable has a non-blank value, it prevents silent kernel
|
||||
config udpates (requires explicit updates).
|
||||
|
||||
KCONFIG_CONFIG
|
||||
--------------------------------------------------
|
||||
This environment variable can be used to specify a default kernel config
|
||||
file name to override the default name of ".config".
|
||||
|
||||
KCONFIG_OVERWRITECONFIG
|
||||
--------------------------------------------------
|
||||
If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
|
||||
break symlinks when .config is a symlink to somewhere else.
|
||||
|
||||
KCONFIG_NOTIMESTAMP
|
||||
--------------------------------------------------
|
||||
If this environment variable exists and is non-null, the timestamp line
|
||||
in generated .config files is omitted.
|
||||
|
||||
KCONFIG_AUTOCONFIG
|
||||
--------------------------------------------------
|
||||
This environment variable can be set to specify the path & name of the
|
||||
"auto.conf" file. Its default value is "include/config/auto.conf".
|
||||
|
||||
KCONFIG_AUTOHEADER
|
||||
--------------------------------------------------
|
||||
This environment variable can be set to specify the path & name of the
|
||||
"autoconf.h" (header) file. Its default value is "include/linux/autoconf.h".
|
||||
|
||||
______________________________________________________________________
|
||||
menuconfig User Interface Options
|
||||
----------------------------------------------------------------------
|
||||
MENUCONFIG_MODE
|
||||
--------------------------------------------------
|
||||
This mode shows all sub-menus in one large tree.
|
||||
|
||||
Example:
|
||||
MENUCONFIG_MODE=single_menu make menuconfig
|
||||
|
||||
======================================================================
|
||||
xconfig
|
||||
--------------------------------------------------
|
||||
|
||||
Searching in xconfig:
|
||||
|
||||
The Search function searches for kernel configuration symbol
|
||||
names, so you have to know something close to what you are
|
||||
looking for.
|
||||
|
||||
Example:
|
||||
Ctrl-F hotplug
|
||||
or
|
||||
Menu: File, Search, hotplug
|
||||
|
||||
lists all config symbol entries that contain "hotplug" in
|
||||
the symbol name. In this Search dialog, you may change the
|
||||
config setting for any of the entries that are not grayed out.
|
||||
You can also enter a different search string without having
|
||||
to return to the main menu.
|
||||
|
||||
|
||||
======================================================================
|
||||
gconfig
|
||||
--------------------------------------------------
|
||||
|
||||
Searching in gconfig:
|
||||
|
||||
None (gconfig isn't maintained as well as xconfig or menuconfig);
|
||||
however, gconfig does have a few more viewing choices than
|
||||
xconfig does.
|
||||
|
||||
###
|
@ -253,7 +253,7 @@ following files:
|
||||
|
||||
# Module specific targets
|
||||
genbin:
|
||||
echo "X" > 8123_bin_shipped
|
||||
echo "X" > 8123_bin.o_shipped
|
||||
|
||||
|
||||
In example 2, we are down to two fairly simple files and for simple
|
||||
@ -279,7 +279,7 @@ following files:
|
||||
|
||||
# Module specific targets
|
||||
genbin:
|
||||
echo "X" > 8123_bin_shipped
|
||||
echo "X" > 8123_bin.o_shipped
|
||||
|
||||
endif
|
||||
|
||||
|
@ -71,6 +71,11 @@ The @argument descriptions must begin on the very next line following
|
||||
this opening short function description line, with no intervening
|
||||
empty comment lines.
|
||||
|
||||
If a function parameter is "..." (varargs), it should be listed in
|
||||
kernel-doc notation as:
|
||||
* @...: description
|
||||
|
||||
|
||||
Example kernel-doc data structure comment.
|
||||
|
||||
/**
|
||||
@ -282,6 +287,32 @@ struct my_struct {
|
||||
};
|
||||
|
||||
|
||||
Including documentation blocks in source files
|
||||
----------------------------------------------
|
||||
|
||||
To facilitate having source code and comments close together, you can
|
||||
include kernel-doc documentation blocks that are free-form comments
|
||||
instead of being kernel-doc for functions, structures, unions,
|
||||
enums, or typedefs. This could be used for something like a
|
||||
theory of operation for a driver or library code, for example.
|
||||
|
||||
This is done by using a DOC: section keyword with a section title. E.g.:
|
||||
|
||||
/**
|
||||
* DOC: Theory of Operation
|
||||
*
|
||||
* The whizbang foobar is a dilly of a gizmo. It can do whatever you
|
||||
* want it to do, at any time. It reads your mind. Here's how it works.
|
||||
*
|
||||
* foo bar splat
|
||||
*
|
||||
* The only drawback to this gizmo is that is can sometimes damage
|
||||
* hardware, software, or its subject(s).
|
||||
*/
|
||||
|
||||
DOC: sections are used in SGML templates files as indicated below.
|
||||
|
||||
|
||||
How to make new SGML template files
|
||||
-----------------------------------
|
||||
|
||||
@ -302,6 +333,9 @@ exported using EXPORT_SYMBOL.
|
||||
!F<filename> <function [functions...]> is replaced by the
|
||||
documentation, in <filename>, for the functions listed.
|
||||
|
||||
!P<filename> <section title> is replaced by the contents of the DOC:
|
||||
section titled <section title> from <filename>.
|
||||
Spaces are allowed in <section title>; do not quote the <section title>.
|
||||
|
||||
Tim.
|
||||
*/ <twaugh@redhat.com>
|
||||
|
@ -92,6 +92,7 @@ parameter is applicable:
|
||||
SUSPEND System suspend states are enabled.
|
||||
FTRACE Function tracing enabled.
|
||||
TS Appropriate touchscreen support is enabled.
|
||||
UMS USB Mass Storage support is enabled.
|
||||
USB USB support is enabled.
|
||||
USBHID USB Human Interface Device support is enabled.
|
||||
V4L Video For Linux support is enabled.
|
||||
@ -141,6 +142,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
ht -- run only enough ACPI to enable Hyper Threading
|
||||
strict -- Be less tolerant of platforms that are not
|
||||
strictly ACPI specification compliant.
|
||||
rsdt -- prefer RSDT over (default) XSDT
|
||||
|
||||
See also Documentation/power/pm.txt, pci=noacpi
|
||||
|
||||
@ -151,16 +153,20 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
default: 0
|
||||
|
||||
acpi_sleep= [HW,ACPI] Sleep options
|
||||
Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, old_ordering }
|
||||
See Documentation/power/video.txt for s3_bios and s3_mode.
|
||||
Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
|
||||
old_ordering, s4_nonvs }
|
||||
See Documentation/power/video.txt for information on
|
||||
s3_bios and s3_mode.
|
||||
s3_beep is for debugging; it makes the PC's speaker beep
|
||||
as soon as the kernel's real-mode entry point is called.
|
||||
s4_nohwsig prevents ACPI hardware signature from being
|
||||
used during resume from hibernation.
|
||||
old_ordering causes the ACPI 1.0 ordering of the _PTS
|
||||
control method, wrt putting devices into low power
|
||||
states, to be enforced (the ACPI 2.0 ordering of _PTS is
|
||||
used by default).
|
||||
control method, with respect to putting devices into
|
||||
low power states, to be enforced (the ACPI 2.0 ordering
|
||||
of _PTS is used by default).
|
||||
s4_nonvs prevents the kernel from saving/restoring the
|
||||
ACPI NVS memory during hibernation.
|
||||
|
||||
acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode
|
||||
Format: { level | edge | high | low }
|
||||
@ -195,7 +201,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
acpi_skip_timer_override [HW,ACPI]
|
||||
Recognize and ignore IRQ0/pin2 Interrupt Override.
|
||||
For broken nForce2 BIOS resulting in XT-PIC timer.
|
||||
acpi_use_timer_override [HW,ACPI}
|
||||
acpi_use_timer_override [HW,ACPI]
|
||||
Use timer override. For some broken Nvidia NF5 boards
|
||||
that require a timer override, but don't have
|
||||
HPET
|
||||
@ -470,8 +476,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
clearcpuid=BITNUM [X86]
|
||||
Disable CPUID feature X for the kernel. See
|
||||
include/asm-x86/cpufeature.h for the valid bit numbers.
|
||||
Note the Linux specific bits are not necessarily
|
||||
arch/x86/include/asm/cpufeature.h for the valid bit
|
||||
numbers. Note the Linux specific bits are not necessarily
|
||||
stable over kernel options, but the vendor specific
|
||||
ones should be.
|
||||
Also note that user programs calling CPUID directly
|
||||
@ -552,6 +558,11 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
not work reliably with all consoles, but is known
|
||||
to work with serial and VGA consoles.
|
||||
|
||||
coredump_filter=
|
||||
[KNL] Change the default value for
|
||||
/proc/<pid>/coredump_filter.
|
||||
See also Documentation/filesystems/proc.txt.
|
||||
|
||||
cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver
|
||||
Format:
|
||||
<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
|
||||
@ -567,9 +578,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
a memory unit (amount[KMG]). See also
|
||||
Documentation/kdump/kdump.txt for a example.
|
||||
|
||||
cs4232= [HW,OSS]
|
||||
Format: <io>,<irq>,<dma>,<dma2>,<mpuio>,<mpuirq>
|
||||
|
||||
cs89x0_dma= [HW,NET]
|
||||
Format: <dma>
|
||||
|
||||
@ -722,10 +730,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
Default value is 0.
|
||||
Value can be changed at runtime via /selinux/enforce.
|
||||
|
||||
es1371= [HW,OSS]
|
||||
Format: <spdif>,[<nomix>,[<amplifier>]]
|
||||
See also header of sound/oss/es1371.c.
|
||||
|
||||
ether= [HW,NET] Ethernet cards parameters
|
||||
This option is obsoleted by the "netdev=" option, which
|
||||
has equivalent usage. See its documentation for details.
|
||||
@ -824,8 +828,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
hlt [BUGS=ARM,SH]
|
||||
|
||||
hvc_iucv= [S390] Number of z/VM IUCV Hypervisor console (HVC)
|
||||
back-ends. Valid parameters: 0..8
|
||||
hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC)
|
||||
terminal devices. Valid values: 0..8
|
||||
|
||||
i8042.debug [HW] Toggle i8042 debug mode
|
||||
i8042.direct [HW] Put keyboard port into non-translated mode
|
||||
@ -873,17 +877,19 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
See Documentation/ide/ide.txt.
|
||||
|
||||
idle= [X86]
|
||||
Format: idle=poll or idle=mwait, idle=halt, idle=nomwait
|
||||
Poll forces a polling idle loop that can slightly improves the performance
|
||||
of waking up a idle CPU, but will use a lot of power and make the system
|
||||
run hot. Not recommended.
|
||||
idle=mwait. On systems which support MONITOR/MWAIT but the kernel chose
|
||||
to not use it because it doesn't save as much power as a normal idle
|
||||
loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
|
||||
as idle=poll.
|
||||
idle=halt. Halt is forced to be used for CPU idle.
|
||||
Format: idle=poll, idle=mwait, idle=halt, idle=nomwait
|
||||
Poll forces a polling idle loop that can slightly
|
||||
improve the performance of waking up a idle CPU, but
|
||||
will use a lot of power and make the system run hot.
|
||||
Not recommended.
|
||||
idle=mwait: On systems which support MONITOR/MWAIT but
|
||||
the kernel chose to not use it because it doesn't save
|
||||
as much power as a normal idle loop, use the
|
||||
MONITOR/MWAIT idle loop anyways. Performance should be
|
||||
the same as idle=poll.
|
||||
idle=halt: Halt is forced to be used for CPU idle.
|
||||
In such case C2/C3 won't be used again.
|
||||
idle=nomwait. Disable mwait for CPU C-states
|
||||
idle=nomwait: Disable mwait for CPU C-states
|
||||
|
||||
ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
|
||||
Claim all unknown PCI IDE storage controllers.
|
||||
@ -923,6 +929,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
inttest= [IA64]
|
||||
|
||||
iomem= Disable strict checking of access to MMIO memory
|
||||
strict regions from userspace.
|
||||
relaxed
|
||||
|
||||
iommu= [x86]
|
||||
off
|
||||
force
|
||||
@ -1074,8 +1084,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
lapic [X86-32,APIC] Enable the local APIC even if BIOS
|
||||
disabled it.
|
||||
|
||||
lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer in
|
||||
C2 power state.
|
||||
lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer
|
||||
in C2 power state.
|
||||
|
||||
libata.dma= [LIBATA] DMA control
|
||||
libata.dma=0 Disable all PATA and SATA DMA
|
||||
@ -1127,6 +1137,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
If there are multiple matching configurations changing
|
||||
the same attribute, the last one is used.
|
||||
|
||||
lmb=debug [KNL] Enable lmb debug messages.
|
||||
|
||||
load_ramdisk= [RAM] List of ramdisks to load from floppy
|
||||
See Documentation/blockdev/ramdisk.txt.
|
||||
|
||||
@ -1560,6 +1572,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
nosoftlockup [KNL] Disable the soft-lockup detector.
|
||||
|
||||
noswapaccount [KNL] Disable accounting of swap in memory resource
|
||||
controller. (See Documentation/controllers/memory.txt)
|
||||
|
||||
nosync [HW,M68K] Disables sync negotiation for all devices.
|
||||
|
||||
notsc [BUGS=X86-32] Disable Time Stamp Counter
|
||||
@ -1579,6 +1594,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
nr_uarts= [SERIAL] maximum number of UARTs to be registered.
|
||||
|
||||
ohci1394_dma=early [HW] enable debugging via the ohci1394 driver.
|
||||
See Documentation/debugging-via-ohci1394.txt for more
|
||||
info.
|
||||
|
||||
olpc_ec_timeout= [OLPC] ms delay when issuing EC commands
|
||||
Rather than timing out after 20 ms if an EC
|
||||
command is not properly ACKed, override the length
|
||||
@ -1803,10 +1822,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
autoconfiguration.
|
||||
Ranges are in pairs (memory base and size).
|
||||
|
||||
dynamic_printk
|
||||
Enables pr_debug()/dev_dbg() calls if
|
||||
CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. These can also
|
||||
be switched on/off via <debugfs>/dynamic_printk/modules
|
||||
dynamic_printk Enables pr_debug()/dev_dbg() calls if
|
||||
CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled.
|
||||
These can also be switched on/off via
|
||||
<debugfs>/dynamic_printk/modules
|
||||
|
||||
print-fatal-signals=
|
||||
[KNL] debug: print fatal signals
|
||||
@ -1894,7 +1913,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
|
||||
Format: <reboot_mode>[,<reboot_mode2>[,...]]
|
||||
See arch/*/kernel/reboot.c or arch/*/kernel/process.c
|
||||
See arch/*/kernel/reboot.c or arch/*/kernel/process.c
|
||||
|
||||
relax_domain_level=
|
||||
[KNL, SMP] Set scheduler's default relax_domain_level.
|
||||
@ -2294,7 +2313,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
thermal.psv= [HW,ACPI]
|
||||
-1: disable all passive trip points
|
||||
<degrees C>: override all passive trip points to this value
|
||||
<degrees C>: override all passive trip points to this
|
||||
value
|
||||
|
||||
thermal.tzp= [HW,ACPI]
|
||||
Specify global default ACPI thermal zone polling rate
|
||||
@ -2382,6 +2402,41 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
usbhid.mousepoll=
|
||||
[USBHID] The interval which mice are to be polled at.
|
||||
|
||||
usb-storage.delay_use=
|
||||
[UMS] The delay in seconds before a new device is
|
||||
scanned for Logical Units (default 5).
|
||||
|
||||
usb-storage.quirks=
|
||||
[UMS] A list of quirks entries to supplement or
|
||||
override the built-in unusual_devs list. List
|
||||
entries are separated by commas. Each entry has
|
||||
the form VID:PID:Flags where VID and PID are Vendor
|
||||
and Product ID values (4-digit hex numbers) and
|
||||
Flags is a set of characters, each corresponding
|
||||
to a common usb-storage quirk flag as follows:
|
||||
a = SANE_SENSE (collect more than 18 bytes
|
||||
of sense data);
|
||||
c = FIX_CAPACITY (decrease the reported
|
||||
device capacity by one sector);
|
||||
h = CAPACITY_HEURISTICS (decrease the
|
||||
reported device capacity by one
|
||||
sector if the number is odd);
|
||||
i = IGNORE_DEVICE (don't bind to this
|
||||
device);
|
||||
l = NOT_LOCKABLE (don't try to lock and
|
||||
unlock ejectable media);
|
||||
m = MAX_SECTORS_64 (don't transfer more
|
||||
than 64 sectors = 32 KB at a time);
|
||||
o = CAPACITY_OK (accept the capacity
|
||||
reported by the device);
|
||||
r = IGNORE_RESIDUE (the device reports
|
||||
bogus residue values);
|
||||
s = SINGLE_LUN (the device has only one
|
||||
Logical Unit);
|
||||
w = NO_WP_DETECT (don't test whether the
|
||||
medium is write-protected).
|
||||
Example: quirks=0419:aaf5:rl,0421:0433:rc
|
||||
|
||||
add_efi_memmap [EFI; x86-32,X86-64] Include EFI memory map in
|
||||
kernel's map of available physical RAM.
|
||||
|
||||
@ -2442,8 +2497,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
Format:
|
||||
<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
|
||||
|
||||
norandmaps Don't use address space randomization
|
||||
Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space
|
||||
norandmaps Don't use address space randomization. Equivalent to
|
||||
echo 0 > /proc/sys/kernel/randomize_va_space
|
||||
|
||||
______________________________________________________________________
|
||||
|
||||
|
@ -118,8 +118,8 @@ the name of the kobject, call kobject_rename():
|
||||
|
||||
int kobject_rename(struct kobject *kobj, const char *new_name);
|
||||
|
||||
Note kobject_rename does perform any locking or have a solid notion of
|
||||
what names are valid so the provide must provide their own sanity checking
|
||||
kobject_rename does not perform any locking or have a solid notion of
|
||||
what names are valid so the caller must provide their own sanity checking
|
||||
and serialization.
|
||||
|
||||
There is a function called kobject_set_name() but that is legacy cruft and
|
||||
|
@ -497,7 +497,10 @@ The first column provides the kernel address where the probe is inserted.
|
||||
The second column identifies the type of probe (k - kprobe, r - kretprobe
|
||||
and j - jprobe), while the third column specifies the symbol+offset of
|
||||
the probe. If the probed function belongs to a module, the module name
|
||||
is also specified.
|
||||
is also specified. Following columns show probe status. If the probe is on
|
||||
a virtual address that is no longer valid (module init sections, module
|
||||
virtual addresses that correspond to modules that've been unloaded),
|
||||
such probes are marked with [GONE].
|
||||
|
||||
/debug/kprobes/enabled: Turn kprobes ON/OFF
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
ThinkPad ACPI Extras Driver
|
||||
|
||||
Version 0.21
|
||||
May 29th, 2008
|
||||
Version 0.22
|
||||
November 23rd, 2008
|
||||
|
||||
Borislav Deianov <borislav@users.sf.net>
|
||||
Henrique de Moraes Holschuh <hmh@hmh.eng.br>
|
||||
@ -16,7 +16,8 @@ supported by the generic Linux ACPI drivers.
|
||||
This driver used to be named ibm-acpi until kernel 2.6.21 and release
|
||||
0.13-20070314. It used to be in the drivers/acpi tree, but it was
|
||||
moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel
|
||||
2.6.22, and release 0.14.
|
||||
2.6.22, and release 0.14. It was moved to drivers/platform/x86 for
|
||||
kernel 2.6.29 and release 0.22.
|
||||
|
||||
The driver is named "thinkpad-acpi". In some places, like module
|
||||
names, "thinkpad_acpi" is used because of userspace issues.
|
||||
@ -1412,6 +1413,24 @@ Sysfs notes:
|
||||
rfkill controller switch "tpacpi_wwan_sw": refer to
|
||||
Documentation/rfkill.txt for details.
|
||||
|
||||
EXPERIMENTAL: UWB
|
||||
-----------------
|
||||
|
||||
This feature is marked EXPERIMENTAL because it has not been extensively
|
||||
tested and validated in various ThinkPad models yet. The feature may not
|
||||
work as expected. USE WITH CAUTION! To use this feature, you need to supply
|
||||
the experimental=1 parameter when loading the module.
|
||||
|
||||
sysfs rfkill class: switch "tpacpi_uwb_sw"
|
||||
|
||||
This feature exports an rfkill controller for the UWB device, if one is
|
||||
present and enabled in the BIOS.
|
||||
|
||||
Sysfs notes:
|
||||
|
||||
rfkill controller switch "tpacpi_uwb_sw": refer to
|
||||
Documentation/rfkill.txt for details.
|
||||
|
||||
Multiple Commands, Module Parameters
|
||||
------------------------------------
|
||||
|
||||
@ -1475,7 +1494,7 @@ Sysfs interface changelog:
|
||||
|
||||
0x020100: Marker for thinkpad-acpi with hot key NVRAM polling
|
||||
support. If you must, use it to know you should not
|
||||
start an userspace NVRAM poller (allows to detect when
|
||||
start a userspace NVRAM poller (allows to detect when
|
||||
NVRAM is compiled out by the user because it is
|
||||
unneeded/undesired in the first place).
|
||||
0x020101: Marker for thinkpad-acpi with hot key NVRAM polling
|
||||
|
@ -1,5 +1,5 @@
|
||||
# This creates the demonstration utility "lguest" which runs a Linux guest.
|
||||
CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include
|
||||
CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE
|
||||
LDLIBS:=-lz
|
||||
|
||||
all: lguest
|
||||
|
@ -125,14 +125,14 @@ TRIDENT_CARD_MAGIC 0x5072696E trident_card sound/oss/trident.c
|
||||
ROUTER_MAGIC 0x524d4157 wan_device include/linux/wanrouter.h
|
||||
SCC_MAGIC 0x52696368 gs_port drivers/char/scc.h
|
||||
SAVEKMSG_MAGIC1 0x53415645 savekmsg arch/*/amiga/config.c
|
||||
GDA_MAGIC 0x58464552 gda include/asm-mips64/sn/gda.h
|
||||
GDA_MAGIC 0x58464552 gda arch/mips/include/asm/sn/gda.h
|
||||
RED_MAGIC1 0x5a2cf071 (any) mm/slab.c
|
||||
STL_PORTMAGIC 0x5a7182c9 stlport include/linux/stallion.h
|
||||
EEPROM_MAGIC_VALUE 0x5ab478d2 lanai_dev drivers/atm/lanai.c
|
||||
HDLCDRV_MAGIC 0x5ac6e778 hdlcdrv_state include/linux/hdlcdrv.h
|
||||
EPCA_MAGIC 0x5c6df104 channel include/linux/epca.h
|
||||
PCXX_MAGIC 0x5c6df104 channel drivers/char/pcxx.h
|
||||
KV_MAGIC 0x5f4b565f kernel_vars_s include/asm-mips64/sn/klkernvars.h
|
||||
KV_MAGIC 0x5f4b565f kernel_vars_s arch/mips/include/asm/sn/klkernvars.h
|
||||
I810_STATE_MAGIC 0x63657373 i810_state sound/oss/i810_audio.c
|
||||
TRIDENT_STATE_MAGIC 0x63657373 trient_state sound/oss/trident.c
|
||||
M3_CARD_MAGIC 0x646e6f50 m3_card sound/oss/maestro3.c
|
||||
@ -158,7 +158,7 @@ CCB_MAGIC 0xf2691ad2 ccb drivers/scsi/ncr53c8xx.c
|
||||
QUEUE_MAGIC_FREE 0xf7e1c9a3 queue_entry drivers/scsi/arm/queue.c
|
||||
QUEUE_MAGIC_USED 0xf7e1cc33 queue_entry drivers/scsi/arm/queue.c
|
||||
HTB_CMAGIC 0xFEFAFEF1 htb_class net/sched/sch_htb.c
|
||||
NMI_MAGIC 0x48414d4d455201 nmi_s include/asm-mips64/sn/nmi.h
|
||||
NMI_MAGIC 0x48414d4d455201 nmi_s arch/mips/include/asm/sn/nmi.h
|
||||
|
||||
Note that there are also defined special per-driver magic numbers in sound
|
||||
memory management. See include/sound/sndmagic.h for complete list of them. Many
|
||||
|
@ -124,7 +124,7 @@ config options.
|
||||
This option can be kernel module too.
|
||||
|
||||
--------------------------------
|
||||
3 sysfs files for memory hotplug
|
||||
4 sysfs files for memory hotplug
|
||||
--------------------------------
|
||||
All sections have their device information under /sys/devices/system/memory as
|
||||
|
||||
@ -138,11 +138,12 @@ For example, assume 1GiB section size. A device for a memory starting at
|
||||
(0x100000000 / 1Gib = 4)
|
||||
This device covers address range [0x100000000 ... 0x140000000)
|
||||
|
||||
Under each section, you can see 3 files.
|
||||
Under each section, you can see 4 files.
|
||||
|
||||
/sys/devices/system/memory/memoryXXX/phys_index
|
||||
/sys/devices/system/memory/memoryXXX/phys_device
|
||||
/sys/devices/system/memory/memoryXXX/state
|
||||
/sys/devices/system/memory/memoryXXX/removable
|
||||
|
||||
'phys_index' : read-only and contains section id, same as XXX.
|
||||
'state' : read-write
|
||||
@ -150,10 +151,20 @@ Under each section, you can see 3 files.
|
||||
at write: user can specify "online", "offline" command
|
||||
'phys_device': read-only: designed to show the name of physical memory device.
|
||||
This is not well implemented now.
|
||||
'removable' : read-only: contains an integer value indicating
|
||||
whether the memory section is removable or not
|
||||
removable. A value of 1 indicates that the memory
|
||||
section is removable and a value of 0 indicates that
|
||||
it is not removable.
|
||||
|
||||
NOTE:
|
||||
These directories/files appear after physical memory hotplug phase.
|
||||
|
||||
If CONFIG_NUMA is enabled the
|
||||
/sys/devices/system/memory/memoryXXX memory section
|
||||
directories can also be accessed via symbolic links located in
|
||||
the /sys/devices/system/node/node* directories. For example:
|
||||
/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
|
||||
|
||||
--------------------------------
|
||||
4. Physical memory hot-add phase
|
||||
@ -365,7 +376,6 @@ node if necessary.
|
||||
- allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
|
||||
sysctl or new control file.
|
||||
- showing memory section and physical device relationship.
|
||||
- showing memory section and node relationship (maybe good for NUMA)
|
||||
- showing memory section is under ZONE_MOVABLE or not
|
||||
- test and make it better memory offlining.
|
||||
- support HugeTLB page migration and offlining.
|
||||
|
@ -44,7 +44,7 @@ FILES, CONFIGS AND COMPATABILITY
|
||||
|
||||
Two files are introduced:
|
||||
|
||||
a) 'include/asm-mips/mach-au1x00/au1xxx_ide.h'
|
||||
a) 'arch/mips/include/asm/mach-au1x00/au1xxx_ide.h'
|
||||
containes : struct _auide_hwif
|
||||
timing parameters for PIO mode 0/1/2/3/4
|
||||
timing parameters for MWDMA 0/1/2
|
||||
@ -52,14 +52,12 @@ Two files are introduced:
|
||||
b) 'drivers/ide/mips/au1xxx-ide.c'
|
||||
contains the functionality of the AU1XXX IDE driver
|
||||
|
||||
Four configs variables are introduced:
|
||||
Following extra configs variables are introduced:
|
||||
|
||||
CONFIG_BLK_DEV_IDE_AU1XXX_PIO_DBDMA - enable the PIO+DBDMA mode
|
||||
CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA - enable the MWDMA mode
|
||||
CONFIG_BLK_DEV_IDE_AU1XXX_BURSTABLE_ON - set Burstable FIFO in DBDMA
|
||||
controller
|
||||
CONFIG_BLK_DEV_IDE_AU1XXX_SEQTS_PER_RQ - maximum transfer size
|
||||
per descriptor
|
||||
|
||||
|
||||
SUPPORTED IDE MODES
|
||||
@ -87,7 +85,6 @@ CONFIG_BLK_DEV_IDEDMA_PCI=y
|
||||
CONFIG_IDEDMA_PCI_AUTO=y
|
||||
CONFIG_BLK_DEV_IDE_AU1XXX=y
|
||||
CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA=y
|
||||
CONFIG_BLK_DEV_IDE_AU1XXX_SEQTS_PER_RQ=128
|
||||
CONFIG_BLK_DEV_IDEDMA=y
|
||||
CONFIG_IDEDMA_AUTO=y
|
||||
|
||||
@ -105,7 +102,6 @@ CONFIG_BLK_DEV_IDEDMA_PCI=y
|
||||
CONFIG_IDEDMA_PCI_AUTO=y
|
||||
CONFIG_BLK_DEV_IDE_AU1XXX=y
|
||||
CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA=y
|
||||
CONFIG_BLK_DEV_IDE_AU1XXX_SEQTS_PER_RQ=128
|
||||
CONFIG_BLK_DEV_IDEDMA=y
|
||||
CONFIG_IDEDMA_AUTO=y
|
||||
|
||||
|
@ -2,14 +2,14 @@
|
||||
IP-Aliasing:
|
||||
============
|
||||
|
||||
IP-aliases are additional IP-addresses/masks hooked up to a base
|
||||
interface by adding a colon and a string when running ifconfig.
|
||||
IP-aliases are an obsolete way to manage multiple IP-addresses/masks
|
||||
per interface. Newer tools such as iproute2 support multiple
|
||||
address/prefixes per interface, but aliases are still supported
|
||||
for backwards compatibility.
|
||||
|
||||
An alias is formed by adding a colon and a string when running ifconfig.
|
||||
This string is usually numeric, but this is not a must.
|
||||
|
||||
IP-Aliases are avail if CONFIG_INET (`standard' IPv4 networking)
|
||||
is configured in the kernel.
|
||||
|
||||
|
||||
o Alias creation.
|
||||
Alias creation is done by 'magic' interface naming: eg. to create a
|
||||
200.1.1.1 alias for eth0 ...
|
||||
@ -38,16 +38,3 @@ o Relationship with main device
|
||||
|
||||
If the base device is shut down the added aliases will be deleted
|
||||
too.
|
||||
|
||||
|
||||
Contact
|
||||
-------
|
||||
Please finger or e-mail me:
|
||||
Juan Jose Ciarlante <jjciarla@raiz.uncu.edu.ar>
|
||||
|
||||
Updated by Erik Schoenfelder <schoenfr@gaertner.DE>
|
||||
|
||||
; local variables:
|
||||
; mode: indented-text
|
||||
; mode: auto-fill
|
||||
; end:
|
||||
|
@ -51,7 +51,8 @@ Built-in netconsole starts immediately after the TCP stack is
|
||||
initialized and attempts to bring up the supplied dev at the supplied
|
||||
address.
|
||||
|
||||
The remote host can run either 'netcat -u -l -p <port>' or syslogd.
|
||||
The remote host can run either 'netcat -u -l -p <port>',
|
||||
'nc -l -u <port>' or syslogd.
|
||||
|
||||
Dynamic reconfiguration:
|
||||
========================
|
||||
|
@ -540,7 +540,7 @@ A client would issue an operation by:
|
||||
MSG_MORE should be set in msghdr::msg_flags on all but the last part of
|
||||
the request. Multiple requests may be made simultaneously.
|
||||
|
||||
If a call is intended to go to a destination other then the default
|
||||
If a call is intended to go to a destination other than the default
|
||||
specified through connect(), then msghdr::msg_name should be set on the
|
||||
first request message of that call.
|
||||
|
||||
|
@ -118,7 +118,7 @@ As mentioned above, main purpose of TUN/TAP driver is tunneling.
|
||||
It is used by VTun (http://vtun.sourceforge.net).
|
||||
|
||||
Another interesting application using TUN/TAP is pipsecd
|
||||
(http://perso.enst.fr/~beyssac/pipsec/), an userspace IPSec
|
||||
(http://perso.enst.fr/~beyssac/pipsec/), a userspace IPSec
|
||||
implementation that can use complete kernel routing (unlike FreeS/WAN).
|
||||
|
||||
3. How does Virtual network device actually work ?
|
||||
|
@ -109,12 +109,18 @@ and it's also much more restricted in the latter case:
|
||||
FURTHER NOTES ON NO-MMU MMAP
|
||||
============================
|
||||
|
||||
(*) A request for a private mapping of less than a page in size may not return
|
||||
a page-aligned buffer. This is because the kernel calls kmalloc() to
|
||||
allocate the buffer, not get_free_page().
|
||||
(*) A request for a private mapping of a file may return a buffer that is not
|
||||
page-aligned. This is because XIP may take place, and the data may not be
|
||||
paged aligned in the backing store.
|
||||
|
||||
(*) A list of all the mappings on the system is visible through /proc/maps in
|
||||
no-MMU mode.
|
||||
(*) A request for an anonymous mapping will always be page aligned. If
|
||||
possible the size of the request should be a power of two otherwise some
|
||||
of the space may be wasted as the kernel must allocate a power-of-2
|
||||
granule but will only discard the excess if appropriately configured as
|
||||
this has an effect on fragmentation.
|
||||
|
||||
(*) A list of all the private copy and anonymous mappings on the system is
|
||||
visible through /proc/maps in no-MMU mode.
|
||||
|
||||
(*) A list of all the mappings in use by a process is visible through
|
||||
/proc/<pid>/maps in no-MMU mode.
|
||||
@ -242,3 +248,18 @@ PROVIDING SHAREABLE BLOCK DEVICE SUPPORT
|
||||
Provision of shared mappings on block device files is exactly the same as for
|
||||
character devices. If there isn't a real device underneath, then the driver
|
||||
should allocate sufficient contiguous memory to honour any supported mapping.
|
||||
|
||||
|
||||
=================================
|
||||
ADJUSTING PAGE TRIMMING BEHAVIOUR
|
||||
=================================
|
||||
|
||||
NOMMU mmap automatically rounds up to the nearest power-of-2 number of pages
|
||||
when performing an allocation. This can have adverse effects on memory
|
||||
fragmentation, and as such, is left configurable. The default behaviour is to
|
||||
aggressively trim allocations and discard any excess pages back in to the page
|
||||
allocator. In order to retain finer-grained control over fragmentation, this
|
||||
behaviour can either be disabled completely, or bumped up to a higher page
|
||||
watermark where trimming begins.
|
||||
|
||||
Page trimming behaviour is configurable via the sysctl `vm.nr_trim_pages'.
|
||||
|
@ -31,7 +31,7 @@ anyways).
|
||||
|
||||
After detecting the processor type, the kernel patches out sections of code
|
||||
that shouldn't be used by writing nop's over it. Using cpufeatures requires
|
||||
just 2 macros (found in include/asm-ppc/cputable.h), as seen in head.S
|
||||
just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S
|
||||
transfer_to_handler:
|
||||
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
|
39
Documentation/powerpc/dts-bindings/4xx/ndfc.txt
Normal file
39
Documentation/powerpc/dts-bindings/4xx/ndfc.txt
Normal file
@ -0,0 +1,39 @@
|
||||
AMCC NDFC (NanD Flash Controller)
|
||||
|
||||
Required properties:
|
||||
- compatible : "ibm,ndfc".
|
||||
- reg : should specify chip select and size used for the chip (0x2000).
|
||||
|
||||
Optional properties:
|
||||
- ccr : NDFC config and control register value (default 0).
|
||||
- bank-settings : NDFC bank configuration register value (default 0).
|
||||
|
||||
Notes:
|
||||
- partition(s) - follows the OF MTD standard for partitions
|
||||
|
||||
Example:
|
||||
|
||||
ndfc@1,0 {
|
||||
compatible = "ibm,ndfc";
|
||||
reg = <0x00000001 0x00000000 0x00002000>;
|
||||
ccr = <0x00001000>;
|
||||
bank-settings = <0x80002222>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
|
||||
nand {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
|
||||
partition@0 {
|
||||
label = "kernel";
|
||||
reg = <0x00000000 0x00200000>;
|
||||
};
|
||||
partition@200000 {
|
||||
label = "root";
|
||||
reg = <0x00200000 0x03E00000>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -18,7 +18,7 @@ This is the memory-mapped registers for on board FPGA.
|
||||
|
||||
Required properities:
|
||||
- compatible : should be "fsl,fpga-pixis".
|
||||
- reg : should contain the address and the lenght of the FPPGA register
|
||||
- reg : should contain the address and the length of the FPPGA register
|
||||
set.
|
||||
|
||||
Example (MPC8610HPCD):
|
||||
@ -27,3 +27,33 @@ Example (MPC8610HPCD):
|
||||
compatible = "fsl,fpga-pixis";
|
||||
reg = <0xe8000000 32>;
|
||||
};
|
||||
|
||||
* Freescale BCSR GPIO banks
|
||||
|
||||
Some BCSR registers act as simple GPIO controllers, each such
|
||||
register can be represented by the gpio-controller node.
|
||||
|
||||
Required properities:
|
||||
- compatible : Should be "fsl,<board>-bcsr-gpio".
|
||||
- reg : Should contain the address and the length of the GPIO bank
|
||||
register.
|
||||
- #gpio-cells : Should be two. The first cell is the pin number and the
|
||||
second cell is used to specify optional paramters (currently unused).
|
||||
- gpio-controller : Marks the port as GPIO controller.
|
||||
|
||||
Example:
|
||||
|
||||
bcsr@1,0 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
compatible = "fsl,mpc8360mds-bcsr";
|
||||
reg = <1 0 0x8000>;
|
||||
ranges = <0 1 0 0x8000>;
|
||||
|
||||
bcsr13: gpio-controller@d {
|
||||
#gpio-cells = <2>;
|
||||
compatible = "fsl,mpc8360mds-bcsr-gpio";
|
||||
reg = <0xd 1>;
|
||||
gpio-controller;
|
||||
};
|
||||
};
|
||||
|
180
Documentation/powerpc/dts-bindings/fsl/mpc5200.txt
Normal file
180
Documentation/powerpc/dts-bindings/fsl/mpc5200.txt
Normal file
@ -0,0 +1,180 @@
|
||||
MPC5200 Device Tree Bindings
|
||||
----------------------------
|
||||
|
||||
(c) 2006-2009 Secret Lab Technologies Ltd
|
||||
Grant Likely <grant.likely@secretlab.ca>
|
||||
|
||||
Naming conventions
|
||||
------------------
|
||||
For mpc5200 on-chip devices, the format for each compatible value is
|
||||
<chip>-<device>[-<mode>]. The OS should be able to match a device driver
|
||||
to the device based solely on the compatible value. If two drivers
|
||||
match on the compatible list; the 'most compatible' driver should be
|
||||
selected.
|
||||
|
||||
The split between the MPC5200 and the MPC5200B leaves a bit of a
|
||||
conundrum. How should the compatible property be set up to provide
|
||||
maximum compatibility information; but still accurately describe the
|
||||
chip? For the MPC5200; the answer is easy. Most of the SoC devices
|
||||
originally appeared on the MPC5200. Since they didn't exist anywhere
|
||||
else; the 5200 compatible properties will contain only one item;
|
||||
"fsl,mpc5200-<device>".
|
||||
|
||||
The 5200B is almost the same as the 5200, but not quite. It fixes
|
||||
silicon bugs and it adds a small number of enhancements. Most of the
|
||||
devices either provide exactly the same interface as on the 5200. A few
|
||||
devices have extra functions but still have a backwards compatible mode.
|
||||
To express this information as completely as possible, 5200B device trees
|
||||
should have two items in the compatible list:
|
||||
compatible = "fsl,mpc5200b-<device>","fsl,mpc5200-<device>";
|
||||
|
||||
It is *strongly* recommended that 5200B device trees follow this convention
|
||||
(instead of only listing the base mpc5200 item).
|
||||
|
||||
ie. ethernet on mpc5200: compatible = "fsl,mpc5200-fec";
|
||||
ethernet on mpc5200b: compatible = "fsl,mpc5200b-fec", "fsl,mpc5200-fec";
|
||||
|
||||
Modal devices, like PSCs, also append the configured function to the
|
||||
end of the compatible field. ie. A PSC in i2s mode would specify
|
||||
"fsl,mpc5200-psc-i2s", not "fsl,mpc5200-i2s". This convention is chosen to
|
||||
avoid naming conflicts with non-psc devices providing the same
|
||||
function. For example, "fsl,mpc5200-spi" and "fsl,mpc5200-psc-spi" describe
|
||||
the mpc5200 simple spi device and a PSC spi mode respectively.
|
||||
|
||||
At the time of writing, exact chip may be either 'fsl,mpc5200' or
|
||||
'fsl,mpc5200b'.
|
||||
|
||||
The soc node
|
||||
------------
|
||||
This node describes the on chip SOC peripherals. Every mpc5200 based
|
||||
board will have this node, and as such there is a common naming
|
||||
convention for SOC devices.
|
||||
|
||||
Required properties:
|
||||
name description
|
||||
---- -----------
|
||||
ranges Memory range of the internal memory mapped registers.
|
||||
Should be <0 [baseaddr] 0xc000>
|
||||
reg Should be <[baseaddr] 0x100>
|
||||
compatible mpc5200: "fsl,mpc5200-immr"
|
||||
mpc5200b: "fsl,mpc5200b-immr"
|
||||
system-frequency 'fsystem' frequency in Hz; XLB, IPB, USB and PCI
|
||||
clocks are derived from the fsystem clock.
|
||||
bus-frequency IPB bus frequency in Hz. Clock rate
|
||||
used by most of the soc devices.
|
||||
|
||||
soc child nodes
|
||||
---------------
|
||||
Any on chip SOC devices available to Linux must appear as soc5200 child nodes.
|
||||
|
||||
Note: The tables below show the value for the mpc5200. A mpc5200b device
|
||||
tree should use the "fsl,mpc5200b-<device>","fsl,mpc5200-<device>" form.
|
||||
|
||||
Required soc5200 child nodes:
|
||||
name compatible Description
|
||||
---- ---------- -----------
|
||||
cdm@<addr> fsl,mpc5200-cdm Clock Distribution
|
||||
interrupt-controller@<addr> fsl,mpc5200-pic need an interrupt
|
||||
controller to boot
|
||||
bestcomm@<addr> fsl,mpc5200-bestcomm Bestcomm DMA controller
|
||||
|
||||
Recommended soc5200 child nodes; populate as needed for your board
|
||||
name compatible Description
|
||||
---- ---------- -----------
|
||||
timer@<addr> fsl,mpc5200-gpt General purpose timers
|
||||
gpio@<addr> fsl,mpc5200-gpio MPC5200 simple gpio controller
|
||||
gpio@<addr> fsl,mpc5200-gpio-wkup MPC5200 wakeup gpio controller
|
||||
rtc@<addr> fsl,mpc5200-rtc Real time clock
|
||||
mscan@<addr> fsl,mpc5200-mscan CAN bus controller
|
||||
pci@<addr> fsl,mpc5200-pci PCI bridge
|
||||
serial@<addr> fsl,mpc5200-psc-uart PSC in serial mode
|
||||
i2s@<addr> fsl,mpc5200-psc-i2s PSC in i2s mode
|
||||
ac97@<addr> fsl,mpc5200-psc-ac97 PSC in ac97 mode
|
||||
spi@<addr> fsl,mpc5200-psc-spi PSC in spi mode
|
||||
irda@<addr> fsl,mpc5200-psc-irda PSC in IrDA mode
|
||||
spi@<addr> fsl,mpc5200-spi MPC5200 spi device
|
||||
ethernet@<addr> fsl,mpc5200-fec MPC5200 ethernet device
|
||||
ata@<addr> fsl,mpc5200-ata IDE ATA interface
|
||||
i2c@<addr> fsl,mpc5200-i2c I2C controller
|
||||
usb@<addr> fsl,mpc5200-ohci,ohci-be USB controller
|
||||
xlb@<addr> fsl,mpc5200-xlb XLB arbitrator
|
||||
|
||||
fsl,mpc5200-gpt nodes
|
||||
---------------------
|
||||
On the mpc5200 and 5200b, GPT0 has a watchdog timer function. If the board
|
||||
design supports the internal wdt, then the device node for GPT0 should
|
||||
include the empty property 'fsl,has-wdt'.
|
||||
|
||||
An mpc5200-gpt can be used as a single line GPIO controller. To do so,
|
||||
add the following properties to the gpt node:
|
||||
gpio-controller;
|
||||
#gpio-cells = <2>;
|
||||
When referencing the GPIO line from another node, the first cell must always
|
||||
be zero and the second cell represents the gpio flags and described in the
|
||||
gpio device tree binding.
|
||||
|
||||
An mpc5200-gpt can be used as a single line edge sensitive interrupt
|
||||
controller. To do so, add the following properties to the gpt node:
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <1>;
|
||||
When referencing the IRQ line from another node, the cell represents the
|
||||
sense mode; 1 for edge rising, 2 for edge falling.
|
||||
|
||||
fsl,mpc5200-psc nodes
|
||||
---------------------
|
||||
The PSCs should include a cell-index which is the index of the PSC in
|
||||
hardware. cell-index is used to determine which shared SoC registers to
|
||||
use when setting up PSC clocking. cell-index number starts at '0'. ie:
|
||||
PSC1 has 'cell-index = <0>'
|
||||
PSC4 has 'cell-index = <3>'
|
||||
|
||||
PSC in i2s mode: The mpc5200 and mpc5200b PSCs are not compatible when in
|
||||
i2s mode. An 'mpc5200b-psc-i2s' node cannot include 'mpc5200-psc-i2s' in the
|
||||
compatible field.
|
||||
|
||||
|
||||
fsl,mpc5200-gpio and fsl,mpc5200-gpio-wkup nodes
|
||||
------------------------------------------------
|
||||
Each GPIO controller node should have the empty property gpio-controller and
|
||||
#gpio-cells set to 2. First cell is the GPIO number which is interpreted
|
||||
according to the bit numbers in the GPIO control registers. The second cell
|
||||
is for flags which is currently unused.
|
||||
|
||||
fsl,mpc5200-fec nodes
|
||||
---------------------
|
||||
The FEC node can specify one of the following properties to configure
|
||||
the MII link:
|
||||
- fsl,7-wire-mode - An empty property that specifies the link uses 7-wire
|
||||
mode instead of MII
|
||||
- current-speed - Specifies that the MII should be configured for a fixed
|
||||
speed. This property should contain two cells. The
|
||||
first cell specifies the speed in Mbps and the second
|
||||
should be '0' for half duplex and '1' for full duplex
|
||||
- phy-handle - Contains a phandle to an Ethernet PHY.
|
||||
|
||||
Interrupt controller (fsl,mpc5200-pic) node
|
||||
-------------------------------------------
|
||||
The mpc5200 pic binding splits hardware IRQ numbers into two levels. The
|
||||
split reflects the layout of the PIC hardware itself, which groups
|
||||
interrupts into one of three groups; CRIT, MAIN or PERP. Also, the
|
||||
Bestcomm dma engine has it's own set of interrupt sources which are
|
||||
cascaded off of peripheral interrupt 0, which the driver interprets as a
|
||||
fourth group, SDMA.
|
||||
|
||||
The interrupts property for device nodes using the mpc5200 pic consists
|
||||
of three cells; <L1 L2 level>
|
||||
|
||||
L1 := [CRIT=0, MAIN=1, PERP=2, SDMA=3]
|
||||
L2 := interrupt number; directly mapped from the value in the
|
||||
"ICTL PerStat, MainStat, CritStat Encoded Register"
|
||||
level := [LEVEL_HIGH=0, EDGE_RISING=1, EDGE_FALLING=2, LEVEL_LOW=3]
|
||||
|
||||
For external IRQs, use the following interrupt property values (how to
|
||||
specify external interrupts is a frequently asked question):
|
||||
External interrupts:
|
||||
external irq0: interrupts = <0 0 n>;
|
||||
external irq1: interrupts = <1 1 n>;
|
||||
external irq2: interrupts = <1 2 n>;
|
||||
external irq3: interrupts = <1 3 n>;
|
||||
'n' is sense (0: level high, 1: edge rising, 2: edge falling 3: level low)
|
||||
|
@ -1,277 +0,0 @@
|
||||
MPC5200 Device Tree Bindings
|
||||
----------------------------
|
||||
|
||||
(c) 2006-2007 Secret Lab Technologies Ltd
|
||||
Grant Likely <grant.likely at secretlab.ca>
|
||||
|
||||
********** DRAFT ***********
|
||||
* WARNING: Do not depend on the stability of these bindings just yet.
|
||||
* The MPC5200 device tree conventions are still in flux
|
||||
* Keep an eye on the linuxppc-dev mailing list for more details
|
||||
********** DRAFT ***********
|
||||
|
||||
I - Introduction
|
||||
================
|
||||
Boards supported by the arch/powerpc architecture require device tree be
|
||||
passed by the boot loader to the kernel at boot time. The device tree
|
||||
describes what devices are present on the board and how they are
|
||||
connected. The device tree can either be passed as a binary blob (as
|
||||
described in Documentation/powerpc/booting-without-of.txt), or passed
|
||||
by Open Firmware (IEEE 1275) compatible firmware using an OF compatible
|
||||
client interface API.
|
||||
|
||||
This document specifies the requirements on the device-tree for mpc5200
|
||||
based boards. These requirements are above and beyond the details
|
||||
specified in either the Open Firmware spec or booting-without-of.txt
|
||||
|
||||
All new mpc5200-based boards are expected to match this document. In
|
||||
cases where this document is not sufficient to support a new board port,
|
||||
this document should be updated as part of adding the new board support.
|
||||
|
||||
II - Philosophy
|
||||
===============
|
||||
The core of this document is naming convention. The whole point of
|
||||
defining this convention is to reduce or eliminate the number of
|
||||
special cases required to support a 5200 board. If all 5200 boards
|
||||
follow the same convention, then generic 5200 support code will work
|
||||
rather than coding special cases for each new board.
|
||||
|
||||
This section tries to capture the thought process behind why the naming
|
||||
convention is what it is.
|
||||
|
||||
1. names
|
||||
---------
|
||||
There is strong convention/requirements already established for children
|
||||
of the root node. 'cpus' describes the processor cores, 'memory'
|
||||
describes memory, and 'chosen' provides boot configuration. Other nodes
|
||||
are added to describe devices attached to the processor local bus.
|
||||
|
||||
Following convention already established with other system-on-chip
|
||||
processors, 5200 device trees should use the name 'soc5200' for the
|
||||
parent node of on chip devices, and the root node should be its parent.
|
||||
|
||||
Child nodes are typically named after the configured function. ie.
|
||||
the FEC node is named 'ethernet', and a PSC in uart mode is named 'serial'.
|
||||
|
||||
2. device_type property
|
||||
-----------------------
|
||||
similar to the node name convention above; the device_type reflects the
|
||||
configured function of a device. ie. 'serial' for a uart and 'spi' for
|
||||
an spi controller. However, while node names *should* reflect the
|
||||
configured function, device_type *must* match the configured function
|
||||
exactly.
|
||||
|
||||
3. compatible property
|
||||
----------------------
|
||||
Since device_type isn't enough to match devices to drivers, there also
|
||||
needs to be a naming convention for the compatible property. Compatible
|
||||
is an list of device descriptions sorted from specific to generic. For
|
||||
the mpc5200, the required format for each compatible value is
|
||||
<chip>-<device>[-<mode>]. The OS should be able to match a device driver
|
||||
to the device based solely on the compatible value. If two drivers
|
||||
match on the compatible list; the 'most compatible' driver should be
|
||||
selected.
|
||||
|
||||
The split between the MPC5200 and the MPC5200B leaves a bit of a
|
||||
conundrum. How should the compatible property be set up to provide
|
||||
maximum compatibility information; but still accurately describe the
|
||||
chip? For the MPC5200; the answer is easy. Most of the SoC devices
|
||||
originally appeared on the MPC5200. Since they didn't exist anywhere
|
||||
else; the 5200 compatible properties will contain only one item;
|
||||
"mpc5200-<device>".
|
||||
|
||||
The 5200B is almost the same as the 5200, but not quite. It fixes
|
||||
silicon bugs and it adds a small number of enhancements. Most of the
|
||||
devices either provide exactly the same interface as on the 5200. A few
|
||||
devices have extra functions but still have a backwards compatible mode.
|
||||
To express this information as completely as possible, 5200B device trees
|
||||
should have two items in the compatible list;
|
||||
"mpc5200b-<device>\0mpc5200-<device>". It is *strongly* recommended
|
||||
that 5200B device trees follow this convention (instead of only listing
|
||||
the base mpc5200 item).
|
||||
|
||||
If another chip appear on the market with one of the mpc5200 SoC
|
||||
devices, then the compatible list should include mpc5200-<device>.
|
||||
|
||||
ie. ethernet on mpc5200: compatible = "mpc5200-ethernet"
|
||||
ethernet on mpc5200b: compatible = "mpc5200b-ethernet\0mpc5200-ethernet"
|
||||
|
||||
Modal devices, like PSCs, also append the configured function to the
|
||||
end of the compatible field. ie. A PSC in i2s mode would specify
|
||||
"mpc5200-psc-i2s", not "mpc5200-i2s". This convention is chosen to
|
||||
avoid naming conflicts with non-psc devices providing the same
|
||||
function. For example, "mpc5200-spi" and "mpc5200-psc-spi" describe
|
||||
the mpc5200 simple spi device and a PSC spi mode respectively.
|
||||
|
||||
If the soc device is more generic and present on other SOCs, the
|
||||
compatible property can specify the more generic device type also.
|
||||
|
||||
ie. mscan: compatible = "mpc5200-mscan\0fsl,mscan";
|
||||
|
||||
At the time of writing, exact chip may be either 'mpc5200' or
|
||||
'mpc5200b'.
|
||||
|
||||
Device drivers should always try to match as generically as possible.
|
||||
|
||||
III - Structure
|
||||
===============
|
||||
The device tree for an mpc5200 board follows the structure defined in
|
||||
booting-without-of.txt with the following additional notes:
|
||||
|
||||
0) the root node
|
||||
----------------
|
||||
Typical root description node; see booting-without-of
|
||||
|
||||
1) The cpus node
|
||||
----------------
|
||||
The cpus node follows the basic layout described in booting-without-of.
|
||||
The bus-frequency property holds the XLB bus frequency
|
||||
The clock-frequency property holds the core frequency
|
||||
|
||||
2) The memory node
|
||||
------------------
|
||||
Typical memory description node; see booting-without-of.
|
||||
|
||||
3) The soc5200 node
|
||||
-------------------
|
||||
This node describes the on chip SOC peripherals. Every mpc5200 based
|
||||
board will have this node, and as such there is a common naming
|
||||
convention for SOC devices.
|
||||
|
||||
Required properties:
|
||||
name type description
|
||||
---- ---- -----------
|
||||
device_type string must be "soc"
|
||||
ranges int should be <0 baseaddr baseaddr+10000>
|
||||
reg int must be <baseaddr 10000>
|
||||
compatible string mpc5200: "mpc5200-soc"
|
||||
mpc5200b: "mpc5200b-soc\0mpc5200-soc"
|
||||
system-frequency int Fsystem frequency; source of all
|
||||
other clocks.
|
||||
bus-frequency int IPB bus frequency in HZ. Clock rate
|
||||
used by most of the soc devices.
|
||||
#interrupt-cells int must be <3>.
|
||||
|
||||
Recommended properties:
|
||||
name type description
|
||||
---- ---- -----------
|
||||
model string Exact model of the chip;
|
||||
ie: model="fsl,mpc5200"
|
||||
revision string Silicon revision of chip
|
||||
ie: revision="M08A"
|
||||
|
||||
The 'model' and 'revision' properties are *strongly* recommended. Having
|
||||
them presence acts as a bit of a safety net for working around as yet
|
||||
undiscovered bugs on one version of silicon. For example, device drivers
|
||||
can use the model and revision properties to decide if a bug fix should
|
||||
be turned on.
|
||||
|
||||
4) soc5200 child nodes
|
||||
----------------------
|
||||
Any on chip SOC devices available to Linux must appear as soc5200 child nodes.
|
||||
|
||||
Note: The tables below show the value for the mpc5200. A mpc5200b device
|
||||
tree should use the "mpc5200b-<device>\0mpc5200-<device> form.
|
||||
|
||||
Required soc5200 child nodes:
|
||||
name device_type compatible Description
|
||||
---- ----------- ---------- -----------
|
||||
cdm@<addr> cdm mpc5200-cmd Clock Distribution
|
||||
pic@<addr> interrupt-controller mpc5200-pic need an interrupt
|
||||
controller to boot
|
||||
bestcomm@<addr> dma-controller mpc5200-bestcomm 5200 pic also requires
|
||||
the bestcomm device
|
||||
|
||||
Recommended soc5200 child nodes; populate as needed for your board
|
||||
name device_type compatible Description
|
||||
---- ----------- ---------- -----------
|
||||
gpt@<addr> gpt fsl,mpc5200-gpt General purpose timers
|
||||
gpt@<addr> gpt fsl,mpc5200-gpt-gpio General purpose
|
||||
timers in GPIO mode
|
||||
gpio@<addr> fsl,mpc5200-gpio MPC5200 simple gpio
|
||||
controller
|
||||
gpio@<addr> fsl,mpc5200-gpio-wkup MPC5200 wakeup gpio
|
||||
controller
|
||||
rtc@<addr> rtc mpc5200-rtc Real time clock
|
||||
mscan@<addr> mscan mpc5200-mscan CAN bus controller
|
||||
pci@<addr> pci mpc5200-pci PCI bridge
|
||||
serial@<addr> serial mpc5200-psc-uart PSC in serial mode
|
||||
i2s@<addr> sound mpc5200-psc-i2s PSC in i2s mode
|
||||
ac97@<addr> sound mpc5200-psc-ac97 PSC in ac97 mode
|
||||
spi@<addr> spi mpc5200-psc-spi PSC in spi mode
|
||||
irda@<addr> irda mpc5200-psc-irda PSC in IrDA mode
|
||||
spi@<addr> spi mpc5200-spi MPC5200 spi device
|
||||
ethernet@<addr> network mpc5200-fec MPC5200 ethernet device
|
||||
ata@<addr> ata mpc5200-ata IDE ATA interface
|
||||
i2c@<addr> i2c mpc5200-i2c I2C controller
|
||||
usb@<addr> usb-ohci-be mpc5200-ohci,ohci-be USB controller
|
||||
xlb@<addr> xlb mpc5200-xlb XLB arbitrator
|
||||
|
||||
Important child node properties
|
||||
name type description
|
||||
---- ---- -----------
|
||||
cell-index int When multiple devices are present, is the
|
||||
index of the device in the hardware (ie. There
|
||||
are 6 PSC on the 5200 numbered PSC1 to PSC6)
|
||||
PSC1 has 'cell-index = <0>'
|
||||
PSC4 has 'cell-index = <3>'
|
||||
|
||||
5) General Purpose Timer nodes (child of soc5200 node)
|
||||
On the mpc5200 and 5200b, GPT0 has a watchdog timer function. If the board
|
||||
design supports the internal wdt, then the device node for GPT0 should
|
||||
include the empty property 'fsl,has-wdt'.
|
||||
|
||||
6) PSC nodes (child of soc5200 node)
|
||||
PSC nodes can define the optional 'port-number' property to force assignment
|
||||
order of serial ports. For example, PSC5 might be physically connected to
|
||||
the port labeled 'COM1' and PSC1 wired to 'COM1'. In this case, PSC5 would
|
||||
have a "port-number = <0>" property, and PSC1 would have "port-number = <1>".
|
||||
|
||||
PSC in i2s mode: The mpc5200 and mpc5200b PSCs are not compatible when in
|
||||
i2s mode. An 'mpc5200b-psc-i2s' node cannot include 'mpc5200-psc-i2s' in the
|
||||
compatible field.
|
||||
|
||||
7) GPIO controller nodes
|
||||
Each GPIO controller node should have the empty property gpio-controller and
|
||||
#gpio-cells set to 2. First cell is the GPIO number which is interpreted
|
||||
according to the bit numbers in the GPIO control registers. The second cell
|
||||
is for flags which is currently unsused.
|
||||
|
||||
8) FEC nodes
|
||||
The FEC node can specify one of the following properties to configure
|
||||
the MII link:
|
||||
"fsl,7-wire-mode" - An empty property that specifies the link uses 7-wire
|
||||
mode instead of MII
|
||||
"current-speed" - Specifies that the MII should be configured for a fixed
|
||||
speed. This property should contain two cells. The
|
||||
first cell specifies the speed in Mbps and the second
|
||||
should be '0' for half duplex and '1' for full duplex
|
||||
"phy-handle" - Contains a phandle to an Ethernet PHY.
|
||||
|
||||
IV - Extra Notes
|
||||
================
|
||||
|
||||
1. Interrupt mapping
|
||||
--------------------
|
||||
The mpc5200 pic driver splits hardware IRQ numbers into two levels. The
|
||||
split reflects the layout of the PIC hardware itself, which groups
|
||||
interrupts into one of three groups; CRIT, MAIN or PERP. Also, the
|
||||
Bestcomm dma engine has it's own set of interrupt sources which are
|
||||
cascaded off of peripheral interrupt 0, which the driver interprets as a
|
||||
fourth group, SDMA.
|
||||
|
||||
The interrupts property for device nodes using the mpc5200 pic consists
|
||||
of three cells; <L1 L2 level>
|
||||
|
||||
L1 := [CRIT=0, MAIN=1, PERP=2, SDMA=3]
|
||||
L2 := interrupt number; directly mapped from the value in the
|
||||
"ICTL PerStat, MainStat, CritStat Encoded Register"
|
||||
level := [LEVEL_HIGH=0, EDGE_RISING=1, EDGE_FALLING=2, LEVEL_LOW=3]
|
||||
|
||||
2. Shared registers
|
||||
-------------------
|
||||
Some SoC devices share registers between them. ie. the i2c devices use
|
||||
a single clock control register, and almost all device are affected by
|
||||
the port_config register. Devices which need to manipulate shared regs
|
||||
should look to the parent SoC node. The soc node is responsible
|
||||
for arbitrating all shared register access.
|
@ -1402,7 +1402,7 @@ Syscalls are implemented on Linux for S390 by the Supervisor call instruction (S
|
||||
possibilities of these as the instruction is made up of a 0xA opcode & the second byte being
|
||||
the syscall number. They are traced using the simple command.
|
||||
TR SVC <Optional value or range>
|
||||
the syscalls are defined in linux/include/asm-s390/unistd.h
|
||||
the syscalls are defined in linux/arch/s390/include/asm/unistd.h
|
||||
e.g. to trace all file opens just do
|
||||
TR SVC 5 ( as this is the syscall number of open )
|
||||
|
||||
|
@ -98,7 +98,7 @@ platform. Some of the interface routines are specific to Linux/390 and some
|
||||
of them can be found on other Linux platforms implementations too.
|
||||
Miscellaneous function prototypes, data declarations, and macro definitions
|
||||
can be found in the architecture specific C header file
|
||||
linux/include/asm-s390/irq.h.
|
||||
linux/arch/s390/include/asm/irq.h.
|
||||
|
||||
Overview of CDS interface concepts
|
||||
|
||||
|
@ -2,7 +2,7 @@ S390 Debug Feature
|
||||
==================
|
||||
|
||||
files: arch/s390/kernel/debug.c
|
||||
include/asm-s390/debug.h
|
||||
arch/s390/include/asm/debug.h
|
||||
|
||||
Description:
|
||||
------------
|
||||
|
@ -231,7 +231,7 @@ CPU bandwidth control purposes:
|
||||
|
||||
This options needs CONFIG_CGROUPS to be defined, and lets the administrator
|
||||
create arbitrary groups of tasks, using the "cgroup" pseudo filesystem. See
|
||||
Documentation/cgroups.txt for more information about this filesystem.
|
||||
Documentation/cgroups/cgroups.txt for more information about this filesystem.
|
||||
|
||||
Only one of these options to group tasks can be chosen and not both.
|
||||
|
||||
|
@ -733,7 +733,7 @@ Changes from 20040920 to 20041018
|
||||
I/O completion path a little more, especially taking care of
|
||||
fast-pathing the non-error case. Also removes tons of dead
|
||||
members and defines from lpfc_scsi.h - e.g. lpfc_target is down
|
||||
to nothing more then the lpfc_nodelist pointer.
|
||||
to nothing more than the lpfc_nodelist pointer.
|
||||
* Added binary sysfs file to issue mbox commands
|
||||
* Replaced #if __BIG_ENDIAN with #if __BIG_ENDIAN_BITFIELD for
|
||||
compatibility with the user space applications.
|
||||
|
@ -19,7 +19,7 @@ Sun Sep 24 21:30 2000 Gerard Roudier (groudier@club-internet.fr)
|
||||
|
||||
Wed Jul 26 23:30 2000 Gerard Roudier (groudier@club-internet.fr)
|
||||
* version ncr53c8xx-3.4.1
|
||||
- Provide OpenFirmare path through the proc FS on PPC.
|
||||
- Provide OpenFirmware path through the proc FS on PPC.
|
||||
- Remove trailing argument #2 from a couple of #undefs.
|
||||
|
||||
Sun Jul 09 16:30 2000 Gerard Roudier (groudier@club-internet.fr)
|
||||
|
@ -81,7 +81,7 @@ Sun Sep 24 21:30 2000 Gerard Roudier (groudier@club-internet.fr)
|
||||
|
||||
Wed Jul 26 23:30 2000 Gerard Roudier (groudier@club-internet.fr)
|
||||
* version sym53c8xx-1.7.1
|
||||
- Provide OpenFirmare path through the proc FS on PPC.
|
||||
- Provide OpenFirmware path through the proc FS on PPC.
|
||||
- Download of on-chip SRAM using memcpy_toio() doesn't work
|
||||
on PPC. Restore previous method (MEMORY MOVE from SCRIPTS).
|
||||
- Remove trailing argument #2 from a couple of #undefs.
|
||||
|
@ -191,7 +191,7 @@ Vport States:
|
||||
This is equivalent to a driver "attach" on an adapter, which is
|
||||
independent of the adapter's link state.
|
||||
- Instantiation of the vport on the FC link via ELS traffic, etc.
|
||||
This is equivalent to a "link up" and successfull link initialization.
|
||||
This is equivalent to a "link up" and successful link initialization.
|
||||
Further information can be found in the interfaces section below for
|
||||
Vport Creation.
|
||||
|
||||
@ -320,7 +320,7 @@ Vport Creation:
|
||||
This is equivalent to a driver "attach" on an adapter, which is
|
||||
independent of the adapter's link state.
|
||||
- Instantiation of the vport on the FC link via ELS traffic, etc.
|
||||
This is equivalent to a "link up" and successfull link initialization.
|
||||
This is equivalent to a "link up" and successful link initialization.
|
||||
|
||||
The LLDD's vport_create() function will not synchronously wait for both
|
||||
parts to be fully completed before returning. It must validate that the
|
||||
|
@ -275,7 +275,8 @@ STAC9200
|
||||
dell-m25 Dell Inspiron E1505n
|
||||
dell-m26 Dell Inspiron 1501
|
||||
dell-m27 Dell Inspiron E1705/9400
|
||||
gateway Gateway laptops with EAPD control
|
||||
gateway-m4 Gateway laptops with EAPD control
|
||||
gateway-m4-2 Gateway laptops with EAPD control
|
||||
panasonic Panasonic CF-74
|
||||
|
||||
STAC9205/9254
|
||||
@ -302,6 +303,7 @@ STAC9220/9221
|
||||
macbook-pro Intel Mac Book Pro 2nd generation (eq. type 3)
|
||||
imac-intel Intel iMac (eq. type 2)
|
||||
imac-intel-20 Intel iMac (newer version) (eq. type 3)
|
||||
ecs202 ECS/PC chips
|
||||
dell-d81 Dell (unknown)
|
||||
dell-d82 Dell (unknown)
|
||||
dell-m81 Dell (unknown)
|
||||
@ -310,9 +312,13 @@ STAC9220/9221
|
||||
STAC9202/9250/9251
|
||||
==================
|
||||
ref Reference board, base config
|
||||
m1 Some Gateway MX series laptops (NX560XL)
|
||||
m1-2 Some Gateway MX series laptops (MX6453)
|
||||
m2 Some Gateway MX series laptops (M255)
|
||||
m2-2 Some Gateway MX series laptops
|
||||
m3 Some Gateway MX series laptops
|
||||
m5 Some Gateway MX series laptops (MP6954)
|
||||
m6 Some Gateway NX series laptops
|
||||
pa6 Gateway NX860 series
|
||||
|
||||
STAC9227/9228/9229/927x
|
||||
=======================
|
||||
@ -329,6 +335,7 @@ STAC92HD71B*
|
||||
dell-m4-1 Dell desktops
|
||||
dell-m4-2 Dell desktops
|
||||
dell-m4-3 Dell desktops
|
||||
hp-m4 HP dv laptops
|
||||
|
||||
STAC92HD73*
|
||||
===========
|
||||
@ -337,10 +344,12 @@ STAC92HD73*
|
||||
dell-m6-amic Dell desktops/laptops with analog mics
|
||||
dell-m6-dmic Dell desktops/laptops with digital mics
|
||||
dell-m6 Dell desktops/laptops with both type of mics
|
||||
dell-eq Dell desktops/laptops
|
||||
|
||||
STAC92HD83*
|
||||
===========
|
||||
ref Reference board
|
||||
mic-ref Reference board with power managment for ports
|
||||
|
||||
STAC9872
|
||||
========
|
||||
|
@ -13,10 +13,20 @@ Description
|
||||
This driver provides glue code connecting a National Semiconductor LM70 LLP
|
||||
temperature sensor evaluation board to the kernel's SPI core subsystem.
|
||||
|
||||
This is a SPI master controller driver. It can be used in conjunction with
|
||||
(layered under) the LM70 logical driver (a "SPI protocol driver").
|
||||
In effect, this driver turns the parallel port interface on the eval board
|
||||
into a SPI bus with a single device, which will be driven by the generic
|
||||
LM70 driver (drivers/hwmon/lm70.c).
|
||||
|
||||
|
||||
Hardware Interfacing
|
||||
--------------------
|
||||
The schematic for this particular board (the LM70EVAL-LLP) is
|
||||
available (on page 4) here:
|
||||
|
||||
http://www.national.com/appinfo/tempsensors/files/LM70LLPEVALmanual.pdf
|
||||
|
||||
The hardware interfacing on the LM70 LLP eval board is as follows:
|
||||
|
||||
Parallel LM70 LLP
|
||||
|
@ -1,12 +1,13 @@
|
||||
Documentation for /proc/sys/vm/* kernel version 2.2.10
|
||||
Documentation for /proc/sys/vm/* kernel version 2.6.29
|
||||
(c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
|
||||
(c) 2008 Peter W. Morreale <pmorreale@novell.com>
|
||||
|
||||
For general info and legal blurb, please look in README.
|
||||
|
||||
==============================================================
|
||||
|
||||
This file contains the documentation for the sysctl files in
|
||||
/proc/sys/vm and is valid for Linux kernel version 2.2.
|
||||
/proc/sys/vm and is valid for Linux kernel version 2.6.29.
|
||||
|
||||
The files in this directory can be used to tune the operation
|
||||
of the virtual memory (VM) subsystem of the Linux kernel and
|
||||
@ -16,81 +17,244 @@ Default values and initialization routines for most of these
|
||||
files can be found in mm/swap.c.
|
||||
|
||||
Currently, these files are in /proc/sys/vm:
|
||||
- overcommit_memory
|
||||
- page-cluster
|
||||
- dirty_ratio
|
||||
|
||||
- block_dump
|
||||
- dirty_background_bytes
|
||||
- dirty_background_ratio
|
||||
- dirty_bytes
|
||||
- dirty_expire_centisecs
|
||||
- dirty_ratio
|
||||
- dirty_writeback_centisecs
|
||||
- highmem_is_dirtyable (only if CONFIG_HIGHMEM set)
|
||||
- drop_caches
|
||||
- hugepages_treat_as_movable
|
||||
- hugetlb_shm_group
|
||||
- laptop_mode
|
||||
- legacy_va_layout
|
||||
- lowmem_reserve_ratio
|
||||
- max_map_count
|
||||
- min_free_kbytes
|
||||
- laptop_mode
|
||||
- block_dump
|
||||
- drop-caches
|
||||
- zone_reclaim_mode
|
||||
- min_unmapped_ratio
|
||||
- min_slab_ratio
|
||||
- panic_on_oom
|
||||
- oom_dump_tasks
|
||||
- oom_kill_allocating_task
|
||||
- mmap_min_address
|
||||
- numa_zonelist_order
|
||||
- min_unmapped_ratio
|
||||
- mmap_min_addr
|
||||
- nr_hugepages
|
||||
- nr_overcommit_hugepages
|
||||
- nr_pdflush_threads
|
||||
- nr_trim_pages (only if CONFIG_MMU=n)
|
||||
- numa_zonelist_order
|
||||
- oom_dump_tasks
|
||||
- oom_kill_allocating_task
|
||||
- overcommit_memory
|
||||
- overcommit_ratio
|
||||
- page-cluster
|
||||
- panic_on_oom
|
||||
- percpu_pagelist_fraction
|
||||
- stat_interval
|
||||
- swappiness
|
||||
- vfs_cache_pressure
|
||||
- zone_reclaim_mode
|
||||
|
||||
|
||||
==============================================================
|
||||
|
||||
dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
|
||||
dirty_writeback_centisecs, highmem_is_dirtyable,
|
||||
vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout,
|
||||
drop-caches, hugepages_treat_as_movable:
|
||||
block_dump
|
||||
|
||||
See Documentation/filesystems/proc.txt
|
||||
block_dump enables block I/O debugging when set to a nonzero value. More
|
||||
information on block I/O debugging is in Documentation/laptops/laptop-mode.txt.
|
||||
|
||||
==============================================================
|
||||
|
||||
overcommit_memory:
|
||||
dirty_background_bytes
|
||||
|
||||
This value contains a flag that enables memory overcommitment.
|
||||
Contains the amount of dirty memory at which the pdflush background writeback
|
||||
daemon will start writeback.
|
||||
|
||||
When this flag is 0, the kernel attempts to estimate the amount
|
||||
of free memory left when userspace requests more memory.
|
||||
|
||||
When this flag is 1, the kernel pretends there is always enough
|
||||
memory until it actually runs out.
|
||||
|
||||
When this flag is 2, the kernel uses a "never overcommit"
|
||||
policy that attempts to prevent any overcommit of memory.
|
||||
|
||||
This feature can be very useful because there are a lot of
|
||||
programs that malloc() huge amounts of memory "just-in-case"
|
||||
and don't use much of it.
|
||||
|
||||
The default value is 0.
|
||||
|
||||
See Documentation/vm/overcommit-accounting and
|
||||
security/commoncap.c::cap_vm_enough_memory() for more information.
|
||||
If dirty_background_bytes is written, dirty_background_ratio becomes a function
|
||||
of its value (dirty_background_bytes / the amount of dirtyable system memory).
|
||||
|
||||
==============================================================
|
||||
|
||||
overcommit_ratio:
|
||||
dirty_background_ratio
|
||||
|
||||
When overcommit_memory is set to 2, the committed address
|
||||
space is not permitted to exceed swap plus this percentage
|
||||
of physical RAM. See above.
|
||||
Contains, as a percentage of total system memory, the number of pages at which
|
||||
the pdflush background writeback daemon will start writing out dirty data.
|
||||
|
||||
==============================================================
|
||||
|
||||
page-cluster:
|
||||
dirty_bytes
|
||||
|
||||
The Linux VM subsystem avoids excessive disk seeks by reading
|
||||
multiple pages on a page fault. The number of pages it reads
|
||||
is dependent on the amount of memory in your machine.
|
||||
Contains the amount of dirty memory at which a process generating disk writes
|
||||
will itself start writeback.
|
||||
|
||||
The number of pages the kernel reads in at once is equal to
|
||||
2 ^ page-cluster. Values above 2 ^ 5 don't make much sense
|
||||
for swap because we only cluster swap data in 32-page groups.
|
||||
If dirty_bytes is written, dirty_ratio becomes a function of its value
|
||||
(dirty_bytes / the amount of dirtyable system memory).
|
||||
|
||||
==============================================================
|
||||
|
||||
dirty_expire_centisecs
|
||||
|
||||
This tunable is used to define when dirty data is old enough to be eligible
|
||||
for writeout by the pdflush daemons. It is expressed in 100'ths of a second.
|
||||
Data which has been dirty in-memory for longer than this interval will be
|
||||
written out next time a pdflush daemon wakes up.
|
||||
|
||||
==============================================================
|
||||
|
||||
dirty_ratio
|
||||
|
||||
Contains, as a percentage of total system memory, the number of pages at which
|
||||
a process which is generating disk writes will itself start writing out dirty
|
||||
data.
|
||||
|
||||
==============================================================
|
||||
|
||||
dirty_writeback_centisecs
|
||||
|
||||
The pdflush writeback daemons will periodically wake up and write `old' data
|
||||
out to disk. This tunable expresses the interval between those wakeups, in
|
||||
100'ths of a second.
|
||||
|
||||
Setting this to zero disables periodic writeback altogether.
|
||||
|
||||
==============================================================
|
||||
|
||||
drop_caches
|
||||
|
||||
Writing to this will cause the kernel to drop clean caches, dentries and
|
||||
inodes from memory, causing that memory to become free.
|
||||
|
||||
To free pagecache:
|
||||
echo 1 > /proc/sys/vm/drop_caches
|
||||
To free dentries and inodes:
|
||||
echo 2 > /proc/sys/vm/drop_caches
|
||||
To free pagecache, dentries and inodes:
|
||||
echo 3 > /proc/sys/vm/drop_caches
|
||||
|
||||
As this is a non-destructive operation and dirty objects are not freeable, the
|
||||
user should run `sync' first.
|
||||
|
||||
==============================================================
|
||||
|
||||
hugepages_treat_as_movable
|
||||
|
||||
This parameter is only useful when kernelcore= is specified at boot time to
|
||||
create ZONE_MOVABLE for pages that may be reclaimed or migrated. Huge pages
|
||||
are not movable so are not normally allocated from ZONE_MOVABLE. A non-zero
|
||||
value written to hugepages_treat_as_movable allows huge pages to be allocated
|
||||
from ZONE_MOVABLE.
|
||||
|
||||
Once enabled, the ZONE_MOVABLE is treated as an area of memory the huge
|
||||
pages pool can easily grow or shrink within. Assuming that applications are
|
||||
not running that mlock() a lot of memory, it is likely the huge pages pool
|
||||
can grow to the size of ZONE_MOVABLE by repeatedly entering the desired value
|
||||
into nr_hugepages and triggering page reclaim.
|
||||
|
||||
==============================================================
|
||||
|
||||
hugetlb_shm_group
|
||||
|
||||
hugetlb_shm_group contains group id that is allowed to create SysV
|
||||
shared memory segment using hugetlb page.
|
||||
|
||||
==============================================================
|
||||
|
||||
laptop_mode
|
||||
|
||||
laptop_mode is a knob that controls "laptop mode". All the things that are
|
||||
controlled by this knob are discussed in Documentation/laptops/laptop-mode.txt.
|
||||
|
||||
==============================================================
|
||||
|
||||
legacy_va_layout
|
||||
|
||||
If non-zero, this sysctl disables the new 32-bit mmap mmap layout - the kernel
|
||||
will use the legacy (2.4) layout for all processes.
|
||||
|
||||
==============================================================
|
||||
|
||||
lowmem_reserve_ratio
|
||||
|
||||
For some specialised workloads on highmem machines it is dangerous for
|
||||
the kernel to allow process memory to be allocated from the "lowmem"
|
||||
zone. This is because that memory could then be pinned via the mlock()
|
||||
system call, or by unavailability of swapspace.
|
||||
|
||||
And on large highmem machines this lack of reclaimable lowmem memory
|
||||
can be fatal.
|
||||
|
||||
So the Linux page allocator has a mechanism which prevents allocations
|
||||
which _could_ use highmem from using too much lowmem. This means that
|
||||
a certain amount of lowmem is defended from the possibility of being
|
||||
captured into pinned user memory.
|
||||
|
||||
(The same argument applies to the old 16 megabyte ISA DMA region. This
|
||||
mechanism will also defend that region from allocations which could use
|
||||
highmem or lowmem).
|
||||
|
||||
The `lowmem_reserve_ratio' tunable determines how aggressive the kernel is
|
||||
in defending these lower zones.
|
||||
|
||||
If you have a machine which uses highmem or ISA DMA and your
|
||||
applications are using mlock(), or if you are running with no swap then
|
||||
you probably should change the lowmem_reserve_ratio setting.
|
||||
|
||||
The lowmem_reserve_ratio is an array. You can see them by reading this file.
|
||||
-
|
||||
% cat /proc/sys/vm/lowmem_reserve_ratio
|
||||
256 256 32
|
||||
-
|
||||
Note: # of this elements is one fewer than number of zones. Because the highest
|
||||
zone's value is not necessary for following calculation.
|
||||
|
||||
But, these values are not used directly. The kernel calculates # of protection
|
||||
pages for each zones from them. These are shown as array of protection pages
|
||||
in /proc/zoneinfo like followings. (This is an example of x86-64 box).
|
||||
Each zone has an array of protection pages like this.
|
||||
|
||||
-
|
||||
Node 0, zone DMA
|
||||
pages free 1355
|
||||
min 3
|
||||
low 3
|
||||
high 4
|
||||
:
|
||||
:
|
||||
numa_other 0
|
||||
protection: (0, 2004, 2004, 2004)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
pagesets
|
||||
cpu: 0 pcp: 0
|
||||
:
|
||||
-
|
||||
These protections are added to score to judge whether this zone should be used
|
||||
for page allocation or should be reclaimed.
|
||||
|
||||
In this example, if normal pages (index=2) are required to this DMA zone and
|
||||
pages_high is used for watermark, the kernel judges this zone should not be
|
||||
used because pages_free(1355) is smaller than watermark + protection[2]
|
||||
(4 + 2004 = 2008). If this protection value is 0, this zone would be used for
|
||||
normal page requirement. If requirement is DMA zone(index=0), protection[0]
|
||||
(=0) is used.
|
||||
|
||||
zone[i]'s protection[j] is calculated by following expression.
|
||||
|
||||
(i < j):
|
||||
zone[i]->protection[j]
|
||||
= (total sums of present_pages from zone[i+1] to zone[j] on the node)
|
||||
/ lowmem_reserve_ratio[i];
|
||||
(i = j):
|
||||
(should not be protected. = 0;
|
||||
(i > j):
|
||||
(not necessary, but looks 0)
|
||||
|
||||
The default values of lowmem_reserve_ratio[i] are
|
||||
256 (if zone[i] means DMA or DMA32 zone)
|
||||
32 (others).
|
||||
As above expression, they are reciprocal number of ratio.
|
||||
256 means 1/256. # of protection pages becomes about "0.39%" of total present
|
||||
pages of higher zones on the node.
|
||||
|
||||
If you would like to protect more pages, smaller values are effective.
|
||||
The minimum value is 1 (1/1 -> 100%).
|
||||
|
||||
==============================================================
|
||||
|
||||
@ -111,9 +275,9 @@ The default value is 65536.
|
||||
|
||||
min_free_kbytes:
|
||||
|
||||
This is used to force the Linux VM to keep a minimum number
|
||||
This is used to force the Linux VM to keep a minimum number
|
||||
of kilobytes free. The VM uses this number to compute a pages_min
|
||||
value for each lowmem zone in the system. Each lowmem zone gets
|
||||
value for each lowmem zone in the system. Each lowmem zone gets
|
||||
a number of reserved free pages based proportionally on its size.
|
||||
|
||||
Some minimal amount of memory is needed to satisfy PF_MEMALLOC
|
||||
@ -122,73 +286,6 @@ become subtly broken, and prone to deadlock under high loads.
|
||||
|
||||
Setting this too high will OOM your machine instantly.
|
||||
|
||||
==============================================================
|
||||
|
||||
percpu_pagelist_fraction
|
||||
|
||||
This is the fraction of pages at most (high mark pcp->high) in each zone that
|
||||
are allocated for each per cpu page list. The min value for this is 8. It
|
||||
means that we don't allow more than 1/8th of pages in each zone to be
|
||||
allocated in any single per_cpu_pagelist. This entry only changes the value
|
||||
of hot per cpu pagelists. User can specify a number like 100 to allocate
|
||||
1/100th of each zone to each per cpu page list.
|
||||
|
||||
The batch value of each per cpu pagelist is also updated as a result. It is
|
||||
set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8)
|
||||
|
||||
The initial value is zero. Kernel does not use this value at boot time to set
|
||||
the high water marks for each per cpu page list.
|
||||
|
||||
===============================================================
|
||||
|
||||
zone_reclaim_mode:
|
||||
|
||||
Zone_reclaim_mode allows someone to set more or less aggressive approaches to
|
||||
reclaim memory when a zone runs out of memory. If it is set to zero then no
|
||||
zone reclaim occurs. Allocations will be satisfied from other zones / nodes
|
||||
in the system.
|
||||
|
||||
This is value ORed together of
|
||||
|
||||
1 = Zone reclaim on
|
||||
2 = Zone reclaim writes dirty pages out
|
||||
4 = Zone reclaim swaps pages
|
||||
|
||||
zone_reclaim_mode is set during bootup to 1 if it is determined that pages
|
||||
from remote zones will cause a measurable performance reduction. The
|
||||
page allocator will then reclaim easily reusable pages (those page
|
||||
cache pages that are currently not used) before allocating off node pages.
|
||||
|
||||
It may be beneficial to switch off zone reclaim if the system is
|
||||
used for a file server and all of memory should be used for caching files
|
||||
from disk. In that case the caching effect is more important than
|
||||
data locality.
|
||||
|
||||
Allowing zone reclaim to write out pages stops processes that are
|
||||
writing large amounts of data from dirtying pages on other nodes. Zone
|
||||
reclaim will write out dirty pages if a zone fills up and so effectively
|
||||
throttle the process. This may decrease the performance of a single process
|
||||
since it cannot use all of system memory to buffer the outgoing writes
|
||||
anymore but it preserve the memory on other nodes so that the performance
|
||||
of other processes running on other nodes will not be affected.
|
||||
|
||||
Allowing regular swap effectively restricts allocations to the local
|
||||
node unless explicitly overridden by memory policies or cpuset
|
||||
configurations.
|
||||
|
||||
=============================================================
|
||||
|
||||
min_unmapped_ratio:
|
||||
|
||||
This is available only on NUMA kernels.
|
||||
|
||||
A percentage of the total pages in each zone. Zone reclaim will only
|
||||
occur if more than this percentage of pages are file backed and unmapped.
|
||||
This is to insure that a minimal amount of local pages is still available for
|
||||
file I/O even if the node is overallocated.
|
||||
|
||||
The default is 1 percent.
|
||||
|
||||
=============================================================
|
||||
|
||||
min_slab_ratio:
|
||||
@ -209,69 +306,16 @@ and may not be fast.
|
||||
|
||||
=============================================================
|
||||
|
||||
panic_on_oom
|
||||
min_unmapped_ratio:
|
||||
|
||||
This enables or disables panic on out-of-memory feature.
|
||||
This is available only on NUMA kernels.
|
||||
|
||||
If this is set to 0, the kernel will kill some rogue process,
|
||||
called oom_killer. Usually, oom_killer can kill rogue processes and
|
||||
system will survive.
|
||||
A percentage of the total pages in each zone. Zone reclaim will only
|
||||
occur if more than this percentage of pages are file backed and unmapped.
|
||||
This is to insure that a minimal amount of local pages is still available for
|
||||
file I/O even if the node is overallocated.
|
||||
|
||||
If this is set to 1, the kernel panics when out-of-memory happens.
|
||||
However, if a process limits using nodes by mempolicy/cpusets,
|
||||
and those nodes become memory exhaustion status, one process
|
||||
may be killed by oom-killer. No panic occurs in this case.
|
||||
Because other nodes' memory may be free. This means system total status
|
||||
may be not fatal yet.
|
||||
|
||||
If this is set to 2, the kernel panics compulsorily even on the
|
||||
above-mentioned.
|
||||
|
||||
The default value is 0.
|
||||
1 and 2 are for failover of clustering. Please select either
|
||||
according to your policy of failover.
|
||||
|
||||
=============================================================
|
||||
|
||||
oom_dump_tasks
|
||||
|
||||
Enables a system-wide task dump (excluding kernel threads) to be
|
||||
produced when the kernel performs an OOM-killing and includes such
|
||||
information as pid, uid, tgid, vm size, rss, cpu, oom_adj score, and
|
||||
name. This is helpful to determine why the OOM killer was invoked
|
||||
and to identify the rogue task that caused it.
|
||||
|
||||
If this is set to zero, this information is suppressed. On very
|
||||
large systems with thousands of tasks it may not be feasible to dump
|
||||
the memory state information for each one. Such systems should not
|
||||
be forced to incur a performance penalty in OOM conditions when the
|
||||
information may not be desired.
|
||||
|
||||
If this is set to non-zero, this information is shown whenever the
|
||||
OOM killer actually kills a memory-hogging task.
|
||||
|
||||
The default value is 0.
|
||||
|
||||
=============================================================
|
||||
|
||||
oom_kill_allocating_task
|
||||
|
||||
This enables or disables killing the OOM-triggering task in
|
||||
out-of-memory situations.
|
||||
|
||||
If this is set to zero, the OOM killer will scan through the entire
|
||||
tasklist and select a task based on heuristics to kill. This normally
|
||||
selects a rogue memory-hogging task that frees up a large amount of
|
||||
memory when killed.
|
||||
|
||||
If this is set to non-zero, the OOM killer simply kills the task that
|
||||
triggered the out-of-memory condition. This avoids the expensive
|
||||
tasklist scan.
|
||||
|
||||
If panic_on_oom is selected, it takes precedence over whatever value
|
||||
is used in oom_kill_allocating_task.
|
||||
|
||||
The default value is 0.
|
||||
The default is 1 percent.
|
||||
|
||||
==============================================================
|
||||
|
||||
@ -288,6 +332,50 @@ against future potential kernel bugs.
|
||||
|
||||
==============================================================
|
||||
|
||||
nr_hugepages
|
||||
|
||||
Change the minimum size of the hugepage pool.
|
||||
|
||||
See Documentation/vm/hugetlbpage.txt
|
||||
|
||||
==============================================================
|
||||
|
||||
nr_overcommit_hugepages
|
||||
|
||||
Change the maximum size of the hugepage pool. The maximum is
|
||||
nr_hugepages + nr_overcommit_hugepages.
|
||||
|
||||
See Documentation/vm/hugetlbpage.txt
|
||||
|
||||
==============================================================
|
||||
|
||||
nr_pdflush_threads
|
||||
|
||||
The current number of pdflush threads. This value is read-only.
|
||||
The value changes according to the number of dirty pages in the system.
|
||||
|
||||
When neccessary, additional pdflush threads are created, one per second, up to
|
||||
nr_pdflush_threads_max.
|
||||
|
||||
==============================================================
|
||||
|
||||
nr_trim_pages
|
||||
|
||||
This is available only on NOMMU kernels.
|
||||
|
||||
This value adjusts the excess page trimming behaviour of power-of-2 aligned
|
||||
NOMMU mmap allocations.
|
||||
|
||||
A value of 0 disables trimming of allocations entirely, while a value of 1
|
||||
trims excess pages aggressively. Any value >= 1 acts as the watermark where
|
||||
trimming of allocations is initiated.
|
||||
|
||||
The default value is 1.
|
||||
|
||||
See Documentation/nommu-mmap.txt for more information.
|
||||
|
||||
==============================================================
|
||||
|
||||
numa_zonelist_order
|
||||
|
||||
This sysctl is only for NUMA.
|
||||
@ -333,17 +421,199 @@ this is causing problems for your system/application.
|
||||
|
||||
==============================================================
|
||||
|
||||
nr_hugepages
|
||||
oom_dump_tasks
|
||||
|
||||
Change the minimum size of the hugepage pool.
|
||||
Enables a system-wide task dump (excluding kernel threads) to be
|
||||
produced when the kernel performs an OOM-killing and includes such
|
||||
information as pid, uid, tgid, vm size, rss, cpu, oom_adj score, and
|
||||
name. This is helpful to determine why the OOM killer was invoked
|
||||
and to identify the rogue task that caused it.
|
||||
|
||||
See Documentation/vm/hugetlbpage.txt
|
||||
If this is set to zero, this information is suppressed. On very
|
||||
large systems with thousands of tasks it may not be feasible to dump
|
||||
the memory state information for each one. Such systems should not
|
||||
be forced to incur a performance penalty in OOM conditions when the
|
||||
information may not be desired.
|
||||
|
||||
If this is set to non-zero, this information is shown whenever the
|
||||
OOM killer actually kills a memory-hogging task.
|
||||
|
||||
The default value is 0.
|
||||
|
||||
==============================================================
|
||||
|
||||
nr_overcommit_hugepages
|
||||
oom_kill_allocating_task
|
||||
|
||||
Change the maximum size of the hugepage pool. The maximum is
|
||||
nr_hugepages + nr_overcommit_hugepages.
|
||||
This enables or disables killing the OOM-triggering task in
|
||||
out-of-memory situations.
|
||||
|
||||
See Documentation/vm/hugetlbpage.txt
|
||||
If this is set to zero, the OOM killer will scan through the entire
|
||||
tasklist and select a task based on heuristics to kill. This normally
|
||||
selects a rogue memory-hogging task that frees up a large amount of
|
||||
memory when killed.
|
||||
|
||||
If this is set to non-zero, the OOM killer simply kills the task that
|
||||
triggered the out-of-memory condition. This avoids the expensive
|
||||
tasklist scan.
|
||||
|
||||
If panic_on_oom is selected, it takes precedence over whatever value
|
||||
is used in oom_kill_allocating_task.
|
||||
|
||||
The default value is 0.
|
||||
|
||||
==============================================================
|
||||
|
||||
overcommit_memory:
|
||||
|
||||
This value contains a flag that enables memory overcommitment.
|
||||
|
||||
When this flag is 0, the kernel attempts to estimate the amount
|
||||
of free memory left when userspace requests more memory.
|
||||
|
||||
When this flag is 1, the kernel pretends there is always enough
|
||||
memory until it actually runs out.
|
||||
|
||||
When this flag is 2, the kernel uses a "never overcommit"
|
||||
policy that attempts to prevent any overcommit of memory.
|
||||
|
||||
This feature can be very useful because there are a lot of
|
||||
programs that malloc() huge amounts of memory "just-in-case"
|
||||
and don't use much of it.
|
||||
|
||||
The default value is 0.
|
||||
|
||||
See Documentation/vm/overcommit-accounting and
|
||||
security/commoncap.c::cap_vm_enough_memory() for more information.
|
||||
|
||||
==============================================================
|
||||
|
||||
overcommit_ratio:
|
||||
|
||||
When overcommit_memory is set to 2, the committed address
|
||||
space is not permitted to exceed swap plus this percentage
|
||||
of physical RAM. See above.
|
||||
|
||||
==============================================================
|
||||
|
||||
page-cluster
|
||||
|
||||
page-cluster controls the number of pages which are written to swap in
|
||||
a single attempt. The swap I/O size.
|
||||
|
||||
It is a logarithmic value - setting it to zero means "1 page", setting
|
||||
it to 1 means "2 pages", setting it to 2 means "4 pages", etc.
|
||||
|
||||
The default value is three (eight pages at a time). There may be some
|
||||
small benefits in tuning this to a different value if your workload is
|
||||
swap-intensive.
|
||||
|
||||
=============================================================
|
||||
|
||||
panic_on_oom
|
||||
|
||||
This enables or disables panic on out-of-memory feature.
|
||||
|
||||
If this is set to 0, the kernel will kill some rogue process,
|
||||
called oom_killer. Usually, oom_killer can kill rogue processes and
|
||||
system will survive.
|
||||
|
||||
If this is set to 1, the kernel panics when out-of-memory happens.
|
||||
However, if a process limits using nodes by mempolicy/cpusets,
|
||||
and those nodes become memory exhaustion status, one process
|
||||
may be killed by oom-killer. No panic occurs in this case.
|
||||
Because other nodes' memory may be free. This means system total status
|
||||
may be not fatal yet.
|
||||
|
||||
If this is set to 2, the kernel panics compulsorily even on the
|
||||
above-mentioned.
|
||||
|
||||
The default value is 0.
|
||||
1 and 2 are for failover of clustering. Please select either
|
||||
according to your policy of failover.
|
||||
|
||||
=============================================================
|
||||
|
||||
percpu_pagelist_fraction
|
||||
|
||||
This is the fraction of pages at most (high mark pcp->high) in each zone that
|
||||
are allocated for each per cpu page list. The min value for this is 8. It
|
||||
means that we don't allow more than 1/8th of pages in each zone to be
|
||||
allocated in any single per_cpu_pagelist. This entry only changes the value
|
||||
of hot per cpu pagelists. User can specify a number like 100 to allocate
|
||||
1/100th of each zone to each per cpu page list.
|
||||
|
||||
The batch value of each per cpu pagelist is also updated as a result. It is
|
||||
set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8)
|
||||
|
||||
The initial value is zero. Kernel does not use this value at boot time to set
|
||||
the high water marks for each per cpu page list.
|
||||
|
||||
==============================================================
|
||||
|
||||
stat_interval
|
||||
|
||||
The time interval between which vm statistics are updated. The default
|
||||
is 1 second.
|
||||
|
||||
==============================================================
|
||||
|
||||
swappiness
|
||||
|
||||
This control is used to define how aggressive the kernel will swap
|
||||
memory pages. Higher values will increase agressiveness, lower values
|
||||
descrease the amount of swap.
|
||||
|
||||
The default value is 60.
|
||||
|
||||
==============================================================
|
||||
|
||||
vfs_cache_pressure
|
||||
------------------
|
||||
|
||||
Controls the tendency of the kernel to reclaim the memory which is used for
|
||||
caching of directory and inode objects.
|
||||
|
||||
At the default value of vfs_cache_pressure=100 the kernel will attempt to
|
||||
reclaim dentries and inodes at a "fair" rate with respect to pagecache and
|
||||
swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer
|
||||
to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100
|
||||
causes the kernel to prefer to reclaim dentries and inodes.
|
||||
|
||||
==============================================================
|
||||
|
||||
zone_reclaim_mode:
|
||||
|
||||
Zone_reclaim_mode allows someone to set more or less aggressive approaches to
|
||||
reclaim memory when a zone runs out of memory. If it is set to zero then no
|
||||
zone reclaim occurs. Allocations will be satisfied from other zones / nodes
|
||||
in the system.
|
||||
|
||||
This is value ORed together of
|
||||
|
||||
1 = Zone reclaim on
|
||||
2 = Zone reclaim writes dirty pages out
|
||||
4 = Zone reclaim swaps pages
|
||||
|
||||
zone_reclaim_mode is set during bootup to 1 if it is determined that pages
|
||||
from remote zones will cause a measurable performance reduction. The
|
||||
page allocator will then reclaim easily reusable pages (those page
|
||||
cache pages that are currently not used) before allocating off node pages.
|
||||
|
||||
It may be beneficial to switch off zone reclaim if the system is
|
||||
used for a file server and all of memory should be used for caching files
|
||||
from disk. In that case the caching effect is more important than
|
||||
data locality.
|
||||
|
||||
Allowing zone reclaim to write out pages stops processes that are
|
||||
writing large amounts of data from dirtying pages on other nodes. Zone
|
||||
reclaim will write out dirty pages if a zone fills up and so effectively
|
||||
throttle the process. This may decrease the performance of a single process
|
||||
since it cannot use all of system memory to buffer the outgoing writes
|
||||
anymore but it preserve the memory on other nodes so that the performance
|
||||
of other processes running on other nodes will not be affected.
|
||||
|
||||
Allowing regular swap effectively restricts allocations to the local
|
||||
node unless explicitly overridden by memory policies or cpuset
|
||||
configurations.
|
||||
|
||||
============ End of Document =================================
|
||||
|
@ -1,6 +1,5 @@
|
||||
Linux Magic System Request Key Hacks
|
||||
Documentation for sysrq.c
|
||||
Last update: 2007-AUG-04
|
||||
|
||||
* What is the magic SysRq key?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -211,6 +210,24 @@ within a function called by handle_sysrq, you must be aware that you are in
|
||||
a lock (you are also in an interrupt handler, which means don't sleep!), so
|
||||
you must call __handle_sysrq_nolock instead.
|
||||
|
||||
* When I hit a SysRq key combination only the header appears on the console?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Sysrq output is subject to the same console loglevel control as all
|
||||
other console output. This means that if the kernel was booted 'quiet'
|
||||
as is common on distro kernels the output may not appear on the actual
|
||||
console, even though it will appear in the dmesg buffer, and be accessible
|
||||
via the dmesg command and to the consumers of /proc/kmsg. As a specific
|
||||
exception the header line from the sysrq command is passed to all console
|
||||
consumers as if the current loglevel was maximum. If only the header
|
||||
is emitted it is almost certain that the kernel loglevel is too low.
|
||||
Should you require the output on the console channel then you will need
|
||||
to temporarily up the console loglevel using alt-sysrq-8 or:
|
||||
|
||||
echo 8 > /proc/sysrq-trigger
|
||||
|
||||
Remember to return the loglevel to normal after triggering the sysrq
|
||||
command you are interested in.
|
||||
|
||||
* I have more questions, who can I ask?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
And I'll answer any questions about the registration system you got, also
|
||||
|
@ -6,8 +6,9 @@ in the kernel usb programming guide (kerneldoc, from the source code).
|
||||
API OVERVIEW
|
||||
|
||||
The big picture is that USB drivers can continue to ignore most DMA issues,
|
||||
though they still must provide DMA-ready buffers (see DMA-mapping.txt).
|
||||
That's how they've worked through the 2.4 (and earlier) kernels.
|
||||
though they still must provide DMA-ready buffers (see
|
||||
Documentation/PCI/PCI-DMA-mapping.txt). That's how they've worked through
|
||||
the 2.4 (and earlier) kernels.
|
||||
|
||||
OR: they can now be DMA-aware.
|
||||
|
||||
@ -62,8 +63,8 @@ and effects like cache-trashing can impose subtle penalties.
|
||||
force a consistent memory access ordering by using memory barriers. It's
|
||||
not using a streaming DMA mapping, so it's good for small transfers on
|
||||
systems where the I/O would otherwise thrash an IOMMU mapping. (See
|
||||
Documentation/DMA-mapping.txt for definitions of "coherent" and "streaming"
|
||||
DMA mappings.)
|
||||
Documentation/PCI/PCI-DMA-mapping.txt for definitions of "coherent" and
|
||||
"streaming" DMA mappings.)
|
||||
|
||||
Asking for 1/Nth of a page (as well as asking for N pages) is reasonably
|
||||
space-efficient.
|
||||
@ -93,7 +94,7 @@ WORKING WITH EXISTING BUFFERS
|
||||
Existing buffers aren't usable for DMA without first being mapped into the
|
||||
DMA address space of the device. However, most buffers passed to your
|
||||
driver can safely be used with such DMA mapping. (See the first section
|
||||
of DMA-mapping.txt, titled "What memory is DMA-able?")
|
||||
of Documentation/PCI/PCI-DMA-mapping.txt, titled "What memory is DMA-able?")
|
||||
|
||||
- When you're using scatterlists, you can map everything at once. On some
|
||||
systems, this kicks in an IOMMU and turns the scatterlists into single
|
||||
|
@ -313,11 +313,13 @@ three of the methods listed above. In addition, a driver indicates
|
||||
that it supports autosuspend by setting the .supports_autosuspend flag
|
||||
in its usb_driver structure. It is then responsible for informing the
|
||||
USB core whenever one of its interfaces becomes busy or idle. The
|
||||
driver does so by calling these three functions:
|
||||
driver does so by calling these five functions:
|
||||
|
||||
int usb_autopm_get_interface(struct usb_interface *intf);
|
||||
void usb_autopm_put_interface(struct usb_interface *intf);
|
||||
int usb_autopm_set_interface(struct usb_interface *intf);
|
||||
int usb_autopm_get_interface_async(struct usb_interface *intf);
|
||||
void usb_autopm_put_interface_async(struct usb_interface *intf);
|
||||
|
||||
The functions work by maintaining a counter in the usb_interface
|
||||
structure. When intf->pm_usage_count is > 0 then the interface is
|
||||
@ -330,10 +332,12 @@ associated with the device itself rather than any of its interfaces.
|
||||
This field is used only by the USB core.)
|
||||
|
||||
The driver owns intf->pm_usage_count; it can modify the value however
|
||||
and whenever it likes. A nice aspect of the usb_autopm_* routines is
|
||||
that the changes they make are protected by the usb_device structure's
|
||||
PM mutex (udev->pm_mutex); however drivers may change pm_usage_count
|
||||
without holding the mutex.
|
||||
and whenever it likes. A nice aspect of the non-async usb_autopm_*
|
||||
routines is that the changes they make are protected by the usb_device
|
||||
structure's PM mutex (udev->pm_mutex); however drivers may change
|
||||
pm_usage_count without holding the mutex. Drivers using the async
|
||||
routines are responsible for their own synchronization and mutual
|
||||
exclusion.
|
||||
|
||||
usb_autopm_get_interface() increments pm_usage_count and
|
||||
attempts an autoresume if the new value is > 0 and the
|
||||
@ -348,6 +352,14 @@ without holding the mutex.
|
||||
is suspended, and it attempts an autosuspend if the value is
|
||||
<= 0 and the device isn't suspended.
|
||||
|
||||
usb_autopm_get_interface_async() and
|
||||
usb_autopm_put_interface_async() do almost the same things as
|
||||
their non-async counterparts. The differences are: they do
|
||||
not acquire the PM mutex, and they use a workqueue to do their
|
||||
jobs. As a result they can be called in an atomic context,
|
||||
such as an URB's completion handler, but when they return the
|
||||
device will not generally not yet be in the desired state.
|
||||
|
||||
There also are a couple of utility routines drivers can use:
|
||||
|
||||
usb_autopm_enable() sets pm_usage_cnt to 0 and then calls
|
||||
|
@ -80,12 +80,6 @@ case $1 in
|
||||
start)
|
||||
for dev in ${2:-$hdevs}
|
||||
do
|
||||
uwb_rc=$(readlink -f $dev/uwb_rc)
|
||||
if cat $uwb_rc/beacon | grep -q -- "-1"
|
||||
then
|
||||
echo 13 0 > $uwb_rc/beacon
|
||||
echo I: started beaconing on ch 13 on $(basename $uwb_rc) >&2
|
||||
fi
|
||||
echo $host_CHID > $dev/wusb_chid
|
||||
echo I: started host $(basename $dev) >&2
|
||||
done
|
||||
@ -95,9 +89,6 @@ case $1 in
|
||||
do
|
||||
echo 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > $dev/wusb_chid
|
||||
echo I: stopped host $(basename $dev) >&2
|
||||
uwb_rc=$(readlink -f $dev/uwb_rc)
|
||||
echo -1 | cat > $uwb_rc/beacon
|
||||
echo I: stopped beaconing on $(basename $uwb_rc) >&2
|
||||
done
|
||||
;;
|
||||
set-chid)
|
||||
|
@ -152,3 +152,4 @@
|
||||
151 -> ADS Tech Instant HDTV [1421:0380]
|
||||
152 -> Asus Tiger Rev:1.00 [1043:4857]
|
||||
153 -> Kworld Plus TV Analog Lite PCI [17de:7128]
|
||||
154 -> Avermedia AVerTV GO 007 FM Plus [1461:f31d]
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user