Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6

Conflicts:

	drivers/macintosh/adbhid.c
This commit is contained in:
Dmitry Torokhov 2007-10-12 21:27:47 -04:00
commit b981d8b3f5
7628 changed files with 537075 additions and 239356 deletions

2
.gitignore vendored
View File

@ -7,6 +7,7 @@
#
.*
*.o
*.o.*
*.a
*.s
*.ko
@ -22,6 +23,7 @@
tags
TAGS
vmlinux*
!vmlinux.lds.S
System.map
Module.symvers

46
CREDITS
View File

@ -665,6 +665,11 @@ D: Minor updates to SCSI types, added /proc/pid/maps protection
S: (ask for current address)
S: USA
N: Robin Cornelius
E: robincornelius@users.sourceforge.net
D: Ralink rt2x00 WLAN driver
S: Cornwall, U.K.
N: Mark Corner
E: mcorner@umich.edu
W: http://www.eecs.umich.edu/~mcorner/
@ -679,6 +684,11 @@ D: Kernel module SMART utilities
S: Santa Cruz, California
S: USA
N: Luis Correia
E: lfcorreia@users.sf.net
D: Ralink rt2x00 WLAN driver
S: Belas, Portugal
N: Alan Cox
W: http://www.linux.org.uk/diary/
D: Linux Networking (0.99.10->2.0.29)
@ -833,6 +843,12 @@ S: Lancs
S: PR4 6AX
S: United Kingdom
N: Ivo van Doorn
E: IvDoorn@gmail.com
W: http://www.mendiosus.nl
D: Ralink rt2x00 WLAN driver
S: Haarlem, The Netherlands
N: John G Dorsey
E: john+@cs.cmu.edu
D: ARM Linux ports to Assabet/Neponset, Spot
@ -966,6 +982,7 @@ N: Pekka Enberg
E: penberg@cs.helsinki.fi
W: http://www.cs.helsinki.fi/u/penberg/
D: Various kernel hacks, fixes, and cleanups.
D: Slab allocators
S: Finland
N: David Engebretsen
@ -1939,8 +1956,8 @@ D: for Menuconfig's lxdialog.
N: Christoph Lameter
E: christoph@lameter.com
D: Digiboard PC/Xe and PC/Xi, Digiboard EPCA
D: Early protocol filter for bridging code
D: Bug fixes
D: NUMA support, Slab allocators, Page migration
D: Scalability, Time subsystem
N: Paul Laufer
E: paul@laufernet.com
@ -2212,13 +2229,13 @@ S: 2300 Copenhagen S
S: Denmark
N: Claudio S. Matsuoka
E: claudio@conectiva.com
E: claudio@helllabs.org
E: cmatsuoka@gmail.com
E: claudio@mandriva.com
W: http://helllabs.org/~claudio
D: V4L, OV511 driver hacks
D: V4L, OV511 and HDA-codec hacks
S: Conectiva S.A.
S: R. Tocantins 89
S: 80050-430 Curitiba PR
S: Souza Naves 1250
S: 80050-040 Curitiba PR
S: Brazil
N: Heinz Mauelshagen
@ -3516,6 +3533,12 @@ S: Maastrichterweg 63
S: 5554 GG Valkenswaard
S: The Netherlands
N: Mark Wallis
E: mwallis@serialmonkey.com
W: http://mark.serialmonkey.com
D: Ralink rt2x00 WLAN driver
S: Newcastle, Australia
N: Peter Shaobo Wang
E: pwang@mmdcorp.com
W: http://www.mmdcorp.com/pw/linux
@ -3650,6 +3673,15 @@ S: Alte Regensburger Str. 11a
S: 93149 Nittenau
S: Germany
N: Gertjan van Wingerde
E: gwingerde@home.nl
D: Ralink rt2x00 WLAN driver
D: Minix V2 file-system
D: Misc fixes
S: Geessinkweg 177
S: 7544 TX Enschede
S: The Netherlands
N: Lars Wirzenius
E: liw@iki.fi
D: Linux System Administrator's Guide, author, former maintainer

View File

@ -134,8 +134,6 @@ dvb/
- info on Linux Digital Video Broadcast (DVB) subsystem.
early-userspace/
- info about initramfs, klibc, and userspace early during boot.
ecryptfs.txt
- docs on eCryptfs: stacked cryptographic filesystem for Linux.
eisa.txt
- info on EISA bus support.
exception.txt

View File

@ -45,6 +45,7 @@ o nfs-utils 1.0.5 # showmount --version
o procps 3.2.0 # ps --version
o oprofile 0.9 # oprofiled --version
o udev 081 # udevinfo -V
o grub 0.93 # grub --version
Kernel compilation
==================

View File

@ -633,12 +633,27 @@ covers RTL which is used frequently with assembly language in the kernel.
Kernel developers like to be seen as literate. Do mind the spelling
of kernel messages to make a good impression. Do not use crippled
words like "dont" and use "do not" or "don't" instead.
words like "dont"; use "do not" or "don't" instead. Make the messages
concise, clear, and unambiguous.
Kernel messages do not have to be terminated with a period.
Printing numbers in parentheses (%d) adds no value and should be avoided.
There are a number of driver model diagnostic macros in <linux/device.h>
which you should use to make sure messages are matched to the right device
and driver, and are tagged with the right level: dev_err(), dev_warn(),
dev_info(), and so forth. For messages that aren't associated with a
particular device, <linux/kernel.h> defines pr_debug() and pr_info().
Coming up with good debugging messages can be quite a challenge; and once
you have them, they can be a huge help for remote troubleshooting. Such
messages should be compiled out when the DEBUG symbol is not defined (that
is, by default they are not included). When you use dev_dbg() or pr_debug(),
that's automatic. Many subsystems have Kconfig options to turn on -DDEBUG.
A related convention uses VERBOSE_DEBUG to add dev_vdbg() messages to the
ones already enabled by DEBUG.
Chapter 14: Allocating memory
@ -790,4 +805,5 @@ Kernel CodingStyle, by greg@kroah.com at OLS 2002:
http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
--
Last updated on 2006-December-06.
Last updated on 2007-July-13.

View File

@ -26,7 +26,7 @@ Part Ia - Using large dma-coherent buffers
void *
dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, int flag)
dma_addr_t *dma_handle, gfp_t flag)
void *
pci_alloc_consistent(struct pci_dev *dev, size_t size,
dma_addr_t *dma_handle)
@ -38,7 +38,7 @@ to make sure to flush the processor's write buffers before telling
devices to read that memory.)
This routine allocates a region of <size> bytes of consistent memory.
it also returns a <dma_handle> which may be cast to an unsigned
It also returns a <dma_handle> which may be cast to an unsigned
integer the same width as the bus and used as the physical address
base of the region.
@ -52,21 +52,24 @@ The simplest way to do that is to use the dma_pool calls (see below).
The flag parameter (dma_alloc_coherent only) allows the caller to
specify the GFP_ flags (see kmalloc) for the allocation (the
implementation may chose to ignore flags that affect the location of
implementation may choose to ignore flags that affect the location of
the returned memory, like GFP_DMA). For pci_alloc_consistent, you
must assume GFP_ATOMIC behaviour.
void
dma_free_coherent(struct device *dev, size_t size, void *cpu_addr
dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_handle)
void
pci_free_consistent(struct pci_dev *dev, size_t size, void *cpu_addr
pci_free_consistent(struct pci_dev *dev, size_t size, void *cpu_addr,
dma_addr_t dma_handle)
Free the region of consistent memory you previously allocated. dev,
size and dma_handle must all be the same as those passed into the
consistent allocate. cpu_addr must be the virtual address returned by
the consistent allocate
the consistent allocate.
Note that unlike their sibling allocation calls, these routines
may only be called with IRQs enabled.
Part Ib - Using small dma-coherent buffers
@ -77,9 +80,9 @@ To get this part of the dma_ API, you must #include <linux/dmapool.h>
Many drivers need lots of small dma-coherent memory regions for DMA
descriptors or I/O buffers. Rather than allocating in units of a page
or more using dma_alloc_coherent(), you can use DMA pools. These work
much like a struct kmem_cache, except that they use the dma-coherent allocator
much like a struct kmem_cache, except that they use the dma-coherent allocator,
not __get_free_pages(). Also, they understand common hardware constraints
for alignment, like queue heads needing to be aligned on N byte boundaries.
for alignment, like queue heads needing to be aligned on N-byte boundaries.
struct dma_pool *
@ -102,15 +105,15 @@ crossing restrictions, pass 0 for alloc; passing 4096 says memory allocated
from this pool must not cross 4KByte boundaries.
void *dma_pool_alloc(struct dma_pool *pool, int gfp_flags,
void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
dma_addr_t *dma_handle);
void *pci_pool_alloc(struct pci_pool *pool, int gfp_flags,
void *pci_pool_alloc(struct pci_pool *pool, gfp_t gfp_flags,
dma_addr_t *dma_handle);
This allocates memory from the pool; the returned memory will meet the size
and alignment requirements specified at creation time. Pass GFP_ATOMIC to
prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks)
prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks),
pass GFP_KERNEL to allow blocking. Like dma_alloc_coherent(), this returns
two values: an address usable by the cpu, and the dma address usable by the
pool's device.
@ -123,7 +126,7 @@ pool's device.
dma_addr_t addr);
This puts memory back into the pool. The pool is what was passed to
the pool allocation routine; the cpu and dma addresses are what
the pool allocation routine; the cpu (vaddr) and dma addresses are what
were returned when that routine allocated the memory being freed.
@ -209,18 +212,18 @@ Notes: Not all memory regions in a machine can be mapped by this
API. Further, regions that appear to be physically contiguous in
kernel virtual space may not be contiguous as physical memory. Since
this API does not provide any scatter/gather capability, it will fail
if the user tries to map a non physically contiguous piece of memory.
if the user tries to map a non-physically contiguous piece of memory.
For this reason, it is recommended that memory mapped by this API be
obtained only from sources which guarantee to be physically contiguous
obtained only from sources which guarantee it to be physically contiguous
(like kmalloc).
Further, the physical address of the memory must be within the
dma_mask of the device (the dma_mask represents a bit mask of the
addressable region for the device. i.e. if the physical address of
addressable region for the device. I.e., if the physical address of
the memory anded with the dma_mask is still equal to the physical
address, then the device can perform DMA to the memory). In order to
ensure that the memory allocated by kmalloc is within the dma_mask,
the driver may specify various platform dependent flags to restrict
the driver may specify various platform-dependent flags to restrict
the physical memory range of the allocation (e.g. on x86, GFP_DMA
guarantees to be within the first 16Mb of available physical memory,
as required by ISA devices).
@ -244,14 +247,14 @@ are guaranteed also to be cache line boundaries).
DMA_TO_DEVICE synchronisation must be done after the last modification
of the memory region by the software and before it is handed off to
the driver. Once this primitive is used. Memory covered by this
primitive should be treated as read only by the device. If the device
the driver. Once this primitive is used, memory covered by this
primitive should be treated as read-only by the device. If the device
may write to it at any point, it should be DMA_BIDIRECTIONAL (see
below).
DMA_FROM_DEVICE synchronisation must be done before the driver
accesses data that may be changed by the device. This memory should
be treated as read only by the driver. If the driver needs to write
be treated as read-only by the driver. If the driver needs to write
to it at any point, it should be DMA_BIDIRECTIONAL (see below).
DMA_BIDIRECTIONAL requires special handling: it means that the driver
@ -261,7 +264,7 @@ you must always sync bidirectional memory twice: once before the
memory is handed off to the device (to make sure all memory changes
are flushed from the processor) and once before the data may be
accessed after being used by the device (to make sure any processor
cache lines are updated with data that the device may have changed.
cache lines are updated with data that the device may have changed).
void
dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
@ -302,8 +305,8 @@ pci_dma_mapping_error(dma_addr_t dma_addr)
In some circumstances dma_map_single and dma_map_page will fail to create
a mapping. A driver can check for these errors by testing the returned
dma address with dma_mapping_error(). A non zero return value means the mapping
could not be created and the driver should take appropriate action (eg
dma address with dma_mapping_error(). A non-zero return value means the mapping
could not be created and the driver should take appropriate action (e.g.
reduce current DMA mapping usage or delay and try again later).
int
@ -315,7 +318,7 @@ reduce current DMA mapping usage or delay and try again later).
Maps a scatter gather list from the block layer.
Returns: the number of physical segments mapped (this may be shorted
Returns: the number of physical segments mapped (this may be shorter
than <nents> passed in if the block layer determines that some
elements of the scatter/gather list are physically adjacent and thus
may be mapped with a single entry).
@ -357,7 +360,7 @@ accessed sg->address and sg->length as shown above.
pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
int nents, int direction)
unmap the previously mapped scatter/gather list. All the parameters
Unmap the previously mapped scatter/gather list. All the parameters
must be the same as those and passed in to the scatter/gather mapping
API.
@ -377,7 +380,7 @@ void
pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg,
int nelems, int direction)
synchronise a single contiguous or scatter/gather mapping. All the
Synchronise a single contiguous or scatter/gather mapping. All the
parameters must be the same as those passed into the single mapping
API.
@ -406,7 +409,7 @@ API at all.
void *
dma_alloc_noncoherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, int flag)
dma_addr_t *dma_handle, gfp_t flag)
Identical to dma_alloc_coherent() except that the platform will
choose to return either consistent or non-consistent memory as it sees
@ -426,34 +429,34 @@ void
dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_handle)
free memory allocated by the nonconsistent API. All parameters must
Free memory allocated by the nonconsistent API. All parameters must
be identical to those passed in (and returned by
dma_alloc_noncoherent()).
int
dma_is_consistent(struct device *dev, dma_addr_t dma_handle)
returns true if the device dev is performing consistent DMA on the memory
Returns true if the device dev is performing consistent DMA on the memory
area pointed to by the dma_handle.
int
dma_get_cache_alignment(void)
returns the processor cache alignment. This is the absolute minimum
Returns the processor cache alignment. This is the absolute minimum
alignment *and* width that you must observe when either mapping
memory or doing partial flushes.
Notes: This API may return a number *larger* than the actual cache
line, but it will guarantee that one or more cache lines fit exactly
into the width returned by this call. It will also always be a power
of two for easy alignment
of two for easy alignment.
void
dma_sync_single_range(struct device *dev, dma_addr_t dma_handle,
unsigned long offset, size_t size,
enum dma_data_direction direction)
does a partial sync. starting at offset and continuing for size. You
Does a partial sync, starting at offset and continuing for size. You
must be careful to observe the cache alignment and width when doing
anything like this. You must also be extra careful about accessing
memory you intend to sync partially.
@ -472,21 +475,20 @@ dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
dma_addr_t device_addr, size_t size, int
flags)
Declare region of memory to be handed out by dma_alloc_coherent when
it's asked for coherent memory for this device.
bus_addr is the physical address to which the memory is currently
assigned in the bus responding region (this will be used by the
platform to perform the mapping)
platform to perform the mapping).
device_addr is the physical address the device needs to be programmed
with actually to address this memory (this will be handed out as the
dma_addr_t in dma_alloc_coherent())
dma_addr_t in dma_alloc_coherent()).
size is the size of the area (must be multiples of PAGE_SIZE).
flags can be or'd together and are
flags can be or'd together and are:
DMA_MEMORY_MAP - request that the memory returned from
dma_alloc_coherent() be directly writable.
@ -494,7 +496,7 @@ dma_alloc_coherent() be directly writable.
DMA_MEMORY_IO - request that the memory returned from
dma_alloc_coherent() be addressable using read/write/memcpy_toio etc.
One or both of these flags must be present
One or both of these flags must be present.
DMA_MEMORY_INCLUDES_CHILDREN - make the declared memory be allocated by
dma_alloc_coherent of any child devices of this one (for memory residing
@ -528,7 +530,7 @@ dma_release_declared_memory(struct device *dev)
Remove the memory region previously declared from the system. This
API performs *no* in-use checking for this region and will return
unconditionally having removed all the required structures. It is the
drivers job to ensure that no parts of this memory region are
driver's job to ensure that no parts of this memory region are
currently in use.
void *
@ -538,12 +540,10 @@ dma_mark_declared_memory_occupied(struct device *dev,
This is used to occupy specific regions of the declared space
(dma_alloc_coherent() will hand out the first free region it finds).
device_addr is the *device* address of the region requested
device_addr is the *device* address of the region requested.
size is the size (and should be a page sized multiple).
size is the size (and should be a page-sized multiple).
The return value will be either a pointer to the processor virtual
address of the memory, or an error (via PTR_ERR()) if any part of the
region is occupied.

View File

@ -11,15 +11,15 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
procfs-guide.xml writing_usb_driver.xml \
kernel-api.xml filesystems.xml lsm.xml usb.xml \
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
genericirq.xml
genericirq.xml s390-drivers.xml
###
# The build process is as follows (targets):
# (xmldocs)
# file.tmpl --> file.xml +--> file.ps (psdocs)
# +--> file.pdf (pdfdocs)
# +--> DIR=file (htmldocs)
# +--> man/ (mandocs)
# (xmldocs) [by docproc]
# file.tmpl --> file.xml +--> file.ps (psdocs) [by db2ps or xmlto]
# +--> file.pdf (pdfdocs) [by db2pdf or xmlto]
# +--> DIR=file (htmldocs) [by xmlto]
# +--> man/ (mandocs) [by xmlto]
# for PDF and PS output you can choose between xmlto and docbook-utils tools

View File

@ -316,7 +316,8 @@ CPU B: spin_unlock_irqrestore(&amp;dev_lock, flags)
<chapter id="pubfunctions">
<title>Public Functions Provided</title>
!Einclude/asm-i386/io.h
!Iinclude/asm-i386/io.h
!Elib/iomap.c
</chapter>
</book>

View File

@ -159,7 +159,6 @@ X!Ilib/string.c
!Earch/i386/lib/usercopy.c
</sect1>
<sect1><title>More Memory Management Functions</title>
!Iinclude/linux/rmap.h
!Emm/readahead.c
!Emm/filemap.c
!Emm/memory.c
@ -241,17 +240,23 @@ X!Ilib/string.c
<sect1><title>Driver Support</title>
!Enet/core/dev.c
!Enet/ethernet/eth.c
!Enet/sched/sch_generic.c
!Iinclude/linux/etherdevice.h
!Iinclude/linux/netdevice.h
</sect1>
<sect1><title>PHY Support</title>
!Edrivers/net/phy/phy.c
!Idrivers/net/phy/phy.c
!Edrivers/net/phy/phy_device.c
!Idrivers/net/phy/phy_device.c
!Edrivers/net/phy/mdio_bus.c
!Idrivers/net/phy/mdio_bus.c
<!-- FIXME: Removed for now since no structured comments in source
X!Enet/core/wireless.c
-->
</sect1>
<!-- FIXME: Removed for now since no structured comments in source
<sect1><title>Wireless</title>
X!Enet/core/wireless.c
</sect1>
-->
<sect1><title>Synchronous PPP</title>
!Edrivers/net/wan/syncppp.c
</sect1>
@ -381,7 +386,6 @@ X!Edrivers/base/interface.c
!Edrivers/base/bus.c
</sect1>
<sect1><title>Device Drivers Power Management</title>
!Edrivers/base/power/main.c
!Edrivers/base/power/resume.c
!Edrivers/base/power/suspend.c
</sect1>
@ -399,15 +403,19 @@ X!Edrivers/acpi/pci_bind.c
-->
</sect1>
<sect1><title>Device drivers PnP support</title>
!Edrivers/pnp/core.c
!Idrivers/pnp/core.c
<!-- No correct structured comments
X!Edrivers/pnp/system.c
-->
!Edrivers/pnp/card.c
!Edrivers/pnp/driver.c
!Idrivers/pnp/driver.c
!Edrivers/pnp/manager.c
!Edrivers/pnp/support.c
</sect1>
<sect1><title>Userspace IO devices</title>
!Edrivers/uio/uio.c
!Iinclude/linux/uio_driver.h
</sect1>
</chapter>
<chapter id="blkdev">
@ -701,14 +709,22 @@ X!Idrivers/video/console/fonts.c
<chapter id="splice">
<title>splice API</title>
<para>)
<para>
splice is a method for moving blocks of data around inside the
kernel, without continually transferring it between the kernel
kernel, without continually transferring them between the kernel
and user space.
</para>
!Iinclude/linux/splice.h
!Ffs/splice.c
</chapter>
<chapter id="pipes">
<title>pipes API</title>
<para>
Pipe interfaces are all for in-kernel (builtin image) use.
They are not exported for use by modules.
</para>
!Iinclude/linux/pipe_fs_i.h
!Ffs/pipe.c
</chapter>
</book>

View File

@ -219,7 +219,7 @@
</para>
<sect1 id="lock-intro">
<title>Two Main Types of Kernel Locks: Spinlocks and Semaphores</title>
<title>Three Main Types of Kernel Locks: Spinlocks, Mutexes and Semaphores</title>
<para>
There are three main types of kernel locks. The fundamental type

View File

@ -456,8 +456,9 @@ void (*irq_clear) (struct ata_port *);
<sect2><title>SATA phy read/write</title>
<programlisting>
u32 (*scr_read) (struct ata_port *ap, unsigned int sc_reg);
void (*scr_write) (struct ata_port *ap, unsigned int sc_reg,
int (*scr_read) (struct ata_port *ap, unsigned int sc_reg,
u32 *val);
int (*scr_write) (struct ata_port *ap, unsigned int sc_reg,
u32 val);
</programlisting>

View File

@ -0,0 +1,149 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
<book id="s390drivers">
<bookinfo>
<title>Writing s390 channel device drivers</title>
<authorgroup>
<author>
<firstname>Cornelia</firstname>
<surname>Huck</surname>
<affiliation>
<address>
<email>cornelia.huck@de.ibm.com</email>
</address>
</affiliation>
</author>
</authorgroup>
<copyright>
<year>2007</year>
<holder>IBM Corp.</holder>
</copyright>
<legalnotice>
<para>
This documentation is free software; you can redistribute
it and/or modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later
version.
</para>
<para>
This program is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
</para>
<para>
You should have received a copy of the GNU General Public
License along with this program; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA 02111-1307 USA
</para>
<para>
For more details see the file COPYING in the source
distribution of Linux.
</para>
</legalnotice>
</bookinfo>
<toc></toc>
<chapter id="intro">
<title>Introduction</title>
<para>
This document describes the interfaces available for device drivers that
drive s390 based channel attached devices. This includes interfaces for
interaction with the hardware and interfaces for interacting with the
common driver core. Those interfaces are provided by the s390 common I/O
layer.
</para>
<para>
The document assumes a familarity with the technical terms associated
with the s390 channel I/O architecture. For a description of this
architecture, please refer to the "z/Architecture: Principles of
Operation", IBM publication no. SA22-7832.
</para>
<para>
While most I/O devices on a s390 system are typically driven through the
channel I/O mechanism described here, there are various other methods
(like the diag interface). These are out of the scope of this document.
</para>
<para>
Some additional information can also be found in the kernel source
under Documentation/s390/driver-model.txt.
</para>
</chapter>
<chapter id="ccw">
<title>The ccw bus</title>
<para>
The ccw bus typically contains the majority of devices available to
a s390 system. Named after the channel command word (ccw), the basic
command structure used to address its devices, the ccw bus contains
so-called channel attached devices. They are addressed via subchannels,
visible on the css bus. A device driver, however, will never interact
with the subchannel directly, but only via the device on the ccw bus,
the ccw device.
</para>
<sect1 id="channelIO">
<title>I/O functions for channel-attached devices</title>
<para>
Some hardware structures have been translated into C structures for use
by the common I/O layer and device drivers. For more information on
the hardware structures represented here, please consult the Principles
of Operation.
</para>
!Iinclude/asm-s390/cio.h
</sect1>
<sect1 id="ccwdev">
<title>ccw devices</title>
<para>
Devices that want to initiate channel I/O need to attach to the ccw bus.
Interaction with the driver core is done via the common I/O layer, which
provides the abstractions of ccw devices and ccw device drivers.
</para>
<para>
The functions that initiate or terminate channel I/O all act upon a
ccw device structure. Device drivers must not bypass those functions
or strange side effects may happen.
</para>
!Iinclude/asm-s390/ccwdev.h
!Edrivers/s390/cio/device.c
!Edrivers/s390/cio/device_ops.c
</sect1>
<sect1 id="cmf">
<title>The channel-measurement facility</title>
<para>
The channel-measurement facility provides a means to collect
measurement data which is made available by the channel subsystem
for each channel attached device.
</para>
!Iinclude/asm-s390/cmb.h
!Edrivers/s390/cio/cmf.c
</sect1>
</chapter>
<chapter id="ccwgroup">
<title>The ccwgroup bus</title>
<para>
The ccwgroup bus only contains artificial devices, created by the user.
Many networking devices (e.g. qeth) are in fact composed of several
ccw devices (like read, write and data channel for qeth). The
ccwgroup bus provides a mechanism to create a meta-device which
contains those ccw devices as slave devices and can be associated
with the netdevice.
</para>
<sect1 id="ccwgroupdevices">
<title>ccw group devices</title>
!Iinclude/asm-s390/ccwgroup.h
!Edrivers/s390/cio/ccwgroup.c
</sect1>
</chapter>
</book>

View File

@ -0,0 +1,607 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" []>
<book id="index">
<bookinfo>
<title>The Userspace I/O HOWTO</title>
<author>
<firstname>Hans-Jürgen</firstname>
<surname>Koch</surname>
<authorblurb><para>Linux developer, Linutronix</para></authorblurb>
<affiliation>
<orgname>
<ulink url="http://www.linutronix.de">Linutronix</ulink>
</orgname>
<address>
<email>hjk@linutronix.de</email>
</address>
</affiliation>
</author>
<pubdate>2006-12-11</pubdate>
<abstract>
<para>This HOWTO describes concept and usage of Linux kernel's
Userspace I/O system.</para>
</abstract>
<revhistory>
<revision>
<revnumber>0.3</revnumber>
<date>2007-04-29</date>
<authorinitials>hjk</authorinitials>
<revremark>Added section about userspace drivers.</revremark>
</revision>
<revision>
<revnumber>0.2</revnumber>
<date>2007-02-13</date>
<authorinitials>hjk</authorinitials>
<revremark>Update after multiple mappings were added.</revremark>
</revision>
<revision>
<revnumber>0.1</revnumber>
<date>2006-12-11</date>
<authorinitials>hjk</authorinitials>
<revremark>First draft.</revremark>
</revision>
</revhistory>
</bookinfo>
<chapter id="aboutthisdoc">
<?dbhtml filename="about.html"?>
<title>About this document</title>
<sect1 id="copyright">
<?dbhtml filename="copyright.html"?>
<title>Copyright and License</title>
<para>
Copyright (c) 2006 by Hans-Jürgen Koch.</para>
<para>
This documentation is Free Software licensed under the terms of the
GPL version 2.
</para>
</sect1>
<sect1 id="translations">
<?dbhtml filename="translations.html"?>
<title>Translations</title>
<para>If you know of any translations for this document, or you are
interested in translating it, please email me
<email>hjk@linutronix.de</email>.
</para>
</sect1>
<sect1 id="preface">
<title>Preface</title>
<para>
For many types of devices, creating a Linux kernel driver is
overkill. All that is really needed is some way to handle an
interrupt and provide access to the memory space of the
device. The logic of controlling the device does not
necessarily have to be within the kernel, as the device does
not need to take advantage of any of other resources that the
kernel provides. One such common class of devices that are
like this are for industrial I/O cards.
</para>
<para>
To address this situation, the userspace I/O system (UIO) was
designed. For typical industrial I/O cards, only a very small
kernel module is needed. The main part of the driver will run in
user space. This simplifies development and reduces the risk of
serious bugs within a kernel module.
</para>
</sect1>
<sect1 id="thanks">
<title>Acknowledgments</title>
<para>I'd like to thank Thomas Gleixner and Benedikt Spranger of
Linutronix, who have not only written most of the UIO code, but also
helped greatly writing this HOWTO by giving me all kinds of background
information.</para>
</sect1>
<sect1 id="feedback">
<title>Feedback</title>
<para>Find something wrong with this document? (Or perhaps something
right?) I would love to hear from you. Please email me at
<email>hjk@linutronix.de</email>.</para>
</sect1>
</chapter>
<chapter id="about">
<?dbhtml filename="about.html"?>
<title>About UIO</title>
<para>If you use UIO for your card's driver, here's what you get:</para>
<itemizedlist>
<listitem>
<para>only one small kernel module to write and maintain.</para>
</listitem>
<listitem>
<para>develop the main part of your driver in user space,
with all the tools and libraries you're used to.</para>
</listitem>
<listitem>
<para>bugs in your driver won't crash the kernel.</para>
</listitem>
<listitem>
<para>updates of your driver can take place without recompiling
the kernel.</para>
</listitem>
</itemizedlist>
<sect1 id="how_uio_works">
<title>How UIO works</title>
<para>
Each UIO device is accessed through a device file and several
sysfs attribute files. The device file will be called
<filename>/dev/uio0</filename> for the first device, and
<filename>/dev/uio1</filename>, <filename>/dev/uio2</filename>
and so on for subsequent devices.
</para>
<para><filename>/dev/uioX</filename> is used to access the
address space of the card. Just use
<function>mmap()</function> to access registers or RAM
locations of your card.
</para>
<para>
Interrupts are handled by reading from
<filename>/dev/uioX</filename>. A blocking
<function>read()</function> from
<filename>/dev/uioX</filename> will return as soon as an
interrupt occurs. You can also use
<function>select()</function> on
<filename>/dev/uioX</filename> to wait for an interrupt. The
integer value read from <filename>/dev/uioX</filename>
represents the total interrupt count. You can use this number
to figure out if you missed some interrupts.
</para>
<para>
To handle interrupts properly, your custom kernel module can
provide its own interrupt handler. It will automatically be
called by the built-in handler.
</para>
<para>
For cards that don't generate interrupts but need to be
polled, there is the possibility to set up a timer that
triggers the interrupt handler at configurable time intervals.
See <filename>drivers/uio/uio_dummy.c</filename> for an
example of this technique.
</para>
<para>
Each driver provides attributes that are used to read or write
variables. These attributes are accessible through sysfs
files. A custom kernel driver module can add its own
attributes to the device owned by the uio driver, but not added
to the UIO device itself at this time. This might change in the
future if it would be found to be useful.
</para>
<para>
The following standard attributes are provided by the UIO
framework:
</para>
<itemizedlist>
<listitem>
<para>
<filename>name</filename>: The name of your device. It is
recommended to use the name of your kernel module for this.
</para>
</listitem>
<listitem>
<para>
<filename>version</filename>: A version string defined by your
driver. This allows the user space part of your driver to deal
with different versions of the kernel module.
</para>
</listitem>
<listitem>
<para>
<filename>event</filename>: The total number of interrupts
handled by the driver since the last time the device node was
read.
</para>
</listitem>
</itemizedlist>
<para>
These attributes appear under the
<filename>/sys/class/uio/uioX</filename> directory. Please
note that this directory might be a symlink, and not a real
directory. Any userspace code that accesses it must be able
to handle this.
</para>
<para>
Each UIO device can make one or more memory regions available for
memory mapping. This is necessary because some industrial I/O cards
require access to more than one PCI memory region in a driver.
</para>
<para>
Each mapping has its own directory in sysfs, the first mapping
appears as <filename>/sys/class/uio/uioX/maps/map0/</filename>.
Subsequent mappings create directories <filename>map1/</filename>,
<filename>map2/</filename>, and so on. These directories will only
appear if the size of the mapping is not 0.
</para>
<para>
Each <filename>mapX/</filename> directory contains two read-only files
that show start address and size of the memory:
</para>
<itemizedlist>
<listitem>
<para>
<filename>addr</filename>: The address of memory that can be mapped.
</para>
</listitem>
<listitem>
<para>
<filename>size</filename>: The size, in bytes, of the memory
pointed to by addr.
</para>
</listitem>
</itemizedlist>
<para>
From userspace, the different mappings are distinguished by adjusting
the <varname>offset</varname> parameter of the
<function>mmap()</function> call. To map the memory of mapping N, you
have to use N times the page size as your offset:
</para>
<programlisting format="linespecific">
offset = N * getpagesize();
</programlisting>
</sect1>
</chapter>
<chapter id="using-uio_dummy" xreflabel="Using uio_dummy">
<?dbhtml filename="using-uio_dummy.html"?>
<title>Using uio_dummy</title>
<para>
Well, there is no real use for uio_dummy. Its only purpose is
to test most parts of the UIO system (everything except
hardware interrupts), and to serve as an example for the
kernel module that you will have to write yourself.
</para>
<sect1 id="what_uio_dummy_does">
<title>What uio_dummy does</title>
<para>
The kernel module <filename>uio_dummy.ko</filename> creates a
device that uses a timer to generate periodic interrupts. The
interrupt handler does nothing but increment a counter. The
driver adds two custom attributes, <varname>count</varname>
and <varname>freq</varname>, that appear under
<filename>/sys/devices/platform/uio_dummy/</filename>.
</para>
<para>
The attribute <varname>count</varname> can be read and
written. The associated file
<filename>/sys/devices/platform/uio_dummy/count</filename>
appears as a normal text file and contains the total number of
timer interrupts. If you look at it (e.g. using
<function>cat</function>), you'll notice it is slowly counting
up.
</para>
<para>
The attribute <varname>freq</varname> can be read and written.
The content of
<filename>/sys/devices/platform/uio_dummy/freq</filename>
represents the number of system timer ticks between two timer
interrupts. The default value of <varname>freq</varname> is
the value of the kernel variable <varname>HZ</varname>, which
gives you an interval of one second. Lower values will
increase the frequency. Try the following:
</para>
<programlisting format="linespecific">
cd /sys/devices/platform/uio_dummy/
echo 100 > freq
</programlisting>
<para>
Use <function>cat count</function> to see how the interrupt
frequency changes.
</para>
</sect1>
</chapter>
<chapter id="custom_kernel_module" xreflabel="Writing your own kernel module">
<?dbhtml filename="custom_kernel_module.html"?>
<title>Writing your own kernel module</title>
<para>
Please have a look at <filename>uio_dummy.c</filename> as an
example. The following paragraphs explain the different
sections of this file.
</para>
<sect1 id="uio_info">
<title>struct uio_info</title>
<para>
This structure tells the framework the details of your driver,
Some of the members are required, others are optional.
</para>
<itemizedlist>
<listitem><para>
<varname>char *name</varname>: Required. The name of your driver as
it will appear in sysfs. I recommend using the name of your module for this.
</para></listitem>
<listitem><para>
<varname>char *version</varname>: Required. This string appears in
<filename>/sys/class/uio/uioX/version</filename>.
</para></listitem>
<listitem><para>
<varname>struct uio_mem mem[ MAX_UIO_MAPS ]</varname>: Required if you
have memory that can be mapped with <function>mmap()</function>. For each
mapping you need to fill one of the <varname>uio_mem</varname> structures.
See the description below for details.
</para></listitem>
<listitem><para>
<varname>long irq</varname>: Required. If your hardware generates an
interrupt, it's your modules task to determine the irq number during
initialization. If you don't have a hardware generated interrupt but
want to trigger the interrupt handler in some other way, set
<varname>irq</varname> to <varname>UIO_IRQ_CUSTOM</varname>. The
uio_dummy module does this as it triggers the event mechanism in a timer
routine. If you had no interrupt at all, you could set
<varname>irq</varname> to <varname>UIO_IRQ_NONE</varname>, though this
rarely makes sense.
</para></listitem>
<listitem><para>
<varname>unsigned long irq_flags</varname>: Required if you've set
<varname>irq</varname> to a hardware interrupt number. The flags given
here will be used in the call to <function>request_irq()</function>.
</para></listitem>
<listitem><para>
<varname>int (*mmap)(struct uio_info *info, struct vm_area_struct
*vma)</varname>: Optional. If you need a special
<function>mmap()</function> function, you can set it here. If this
pointer is not NULL, your <function>mmap()</function> will be called
instead of the built-in one.
</para></listitem>
<listitem><para>
<varname>int (*open)(struct uio_info *info, struct inode *inode)
</varname>: Optional. You might want to have your own
<function>open()</function>, e.g. to enable interrupts only when your
device is actually used.
</para></listitem>
<listitem><para>
<varname>int (*release)(struct uio_info *info, struct inode *inode)
</varname>: Optional. If you define your own
<function>open()</function>, you will probably also want a custom
<function>release()</function> function.
</para></listitem>
</itemizedlist>
<para>
Usually, your device will have one or more memory regions that can be mapped
to user space. For each region, you have to set up a
<varname>struct uio_mem</varname> in the <varname>mem[]</varname> array.
Here's a description of the fields of <varname>struct uio_mem</varname>:
</para>
<itemizedlist>
<listitem><para>
<varname>int memtype</varname>: Required if the mapping is used. Set this to
<varname>UIO_MEM_PHYS</varname> if you you have physical memory on your
card to be mapped. Use <varname>UIO_MEM_LOGICAL</varname> for logical
memory (e.g. allocated with <function>kmalloc()</function>). There's also
<varname>UIO_MEM_VIRTUAL</varname> for virtual memory.
</para></listitem>
<listitem><para>
<varname>unsigned long addr</varname>: Required if the mapping is used.
Fill in the address of your memory block. This address is the one that
appears in sysfs.
</para></listitem>
<listitem><para>
<varname>unsigned long size</varname>: Fill in the size of the
memory block that <varname>addr</varname> points to. If <varname>size</varname>
is zero, the mapping is considered unused. Note that you
<emphasis>must</emphasis> initialize <varname>size</varname> with zero for
all unused mappings.
</para></listitem>
<listitem><para>
<varname>void *internal_addr</varname>: If you have to access this memory
region from within your kernel module, you will want to map it internally by
using something like <function>ioremap()</function>. Addresses
returned by this function cannot be mapped to user space, so you must not
store it in <varname>addr</varname>. Use <varname>internal_addr</varname>
instead to remember such an address.
</para></listitem>
</itemizedlist>
<para>
Please do not touch the <varname>kobj</varname> element of
<varname>struct uio_mem</varname>! It is used by the UIO framework
to set up sysfs files for this mapping. Simply leave it alone.
</para>
</sect1>
<sect1 id="adding_irq_handler">
<title>Adding an interrupt handler</title>
<para>
What you need to do in your interrupt handler depends on your
hardware and on how you want to handle it. You should try to
keep the amount of code in your kernel interrupt handler low.
If your hardware requires no action that you
<emphasis>have</emphasis> to perform after each interrupt,
then your handler can be empty.</para> <para>If, on the other
hand, your hardware <emphasis>needs</emphasis> some action to
be performed after each interrupt, then you
<emphasis>must</emphasis> do it in your kernel module. Note
that you cannot rely on the userspace part of your driver. Your
userspace program can terminate at any time, possibly leaving
your hardware in a state where proper interrupt handling is
still required.
</para>
<para>
There might also be applications where you want to read data
from your hardware at each interrupt and buffer it in a piece
of kernel memory you've allocated for that purpose. With this
technique you could avoid loss of data if your userspace
program misses an interrupt.
</para>
<para>
A note on shared interrupts: Your driver should support
interrupt sharing whenever this is possible. It is possible if
and only if your driver can detect whether your hardware has
triggered the interrupt or not. This is usually done by looking
at an interrupt status register. If your driver sees that the
IRQ bit is actually set, it will perform its actions, and the
handler returns IRQ_HANDLED. If the driver detects that it was
not your hardware that caused the interrupt, it will do nothing
and return IRQ_NONE, allowing the kernel to call the next
possible interrupt handler.
</para>
<para>
If you decide not to support shared interrupts, your card
won't work in computers with no free interrupts. As this
frequently happens on the PC platform, you can save yourself a
lot of trouble by supporting interrupt sharing.
</para>
</sect1>
</chapter>
<chapter id="userspace_driver" xreflabel="Writing a driver in user space">
<?dbhtml filename="userspace_driver.html"?>
<title>Writing a driver in userspace</title>
<para>
Once you have a working kernel module for your hardware, you can
write the userspace part of your driver. You don't need any special
libraries, your driver can be written in any reasonable language,
you can use floating point numbers and so on. In short, you can
use all the tools and libraries you'd normally use for writing a
userspace application.
</para>
<sect1 id="getting_uio_information">
<title>Getting information about your UIO device</title>
<para>
Information about all UIO devices is available in sysfs. The
first thing you should do in your driver is check
<varname>name</varname> and <varname>version</varname> to
make sure your talking to the right device and that its kernel
driver has the version you expect.
</para>
<para>
You should also make sure that the memory mapping you need
exists and has the size you expect.
</para>
<para>
There is a tool called <varname>lsuio</varname> that lists
UIO devices and their attributes. It is available here:
</para>
<para>
<ulink url="http://www.osadl.org/projects/downloads/UIO/user/">
http://www.osadl.org/projects/downloads/UIO/user/</ulink>
</para>
<para>
With <varname>lsuio</varname> you can quickly check if your
kernel module is loaded and which attributes it exports.
Have a look at the manpage for details.
</para>
<para>
The source code of <varname>lsuio</varname> can serve as an
example for getting information about an UIO device.
The file <filename>uio_helper.c</filename> contains a lot of
functions you could use in your userspace driver code.
</para>
</sect1>
<sect1 id="mmap_device_memory">
<title>mmap() device memory</title>
<para>
After you made sure you've got the right device with the
memory mappings you need, all you have to do is to call
<function>mmap()</function> to map the device's memory
to userspace.
</para>
<para>
The parameter <varname>offset</varname> of the
<function>mmap()</function> call has a special meaning
for UIO devices: It is used to select which mapping of
your device you want to map. To map the memory of
mapping N, you have to use N times the page size as
your offset:
</para>
<programlisting format="linespecific">
offset = N * getpagesize();
</programlisting>
<para>
N starts from zero, so if you've got only one memory
range to map, set <varname>offset = 0</varname>.
A drawback of this technique is that memory is always
mapped beginning with its start address.
</para>
</sect1>
<sect1 id="wait_for_interrupts">
<title>Waiting for interrupts</title>
<para>
After you successfully mapped your devices memory, you
can access it like an ordinary array. Usually, you will
perform some initialization. After that, your hardware
starts working and will generate an interrupt as soon
as it's finished, has some data available, or needs your
attention because an error occured.
</para>
<para>
<filename>/dev/uioX</filename> is a read-only file. A
<function>read()</function> will always block until an
interrupt occurs. There is only one legal value for the
<varname>count</varname> parameter of
<function>read()</function>, and that is the size of a
signed 32 bit integer (4). Any other value for
<varname>count</varname> causes <function>read()</function>
to fail. The signed 32 bit integer read is the interrupt
count of your device. If the value is one more than the value
you read the last time, everything is OK. If the difference
is greater than one, you missed interrupts.
</para>
<para>
You can also use <function>select()</function> on
<filename>/dev/uioX</filename>.
</para>
</sect1>
</chapter>
<appendix id="app1">
<title>Further information</title>
<itemizedlist>
<listitem><para>
<ulink url="http://www.osadl.org">
OSADL homepage.</ulink>
</para></listitem>
<listitem><para>
<ulink url="http://www.linutronix.de">
Linutronix homepage.</ulink>
</para></listitem>
</itemizedlist>
</appendix>
</book>

View File

@ -208,7 +208,7 @@ tools. One such tool that is particularly recommended is the Linux
Cross-Reference project, which is able to present source code in a
self-referential, indexed webpage format. An excellent up-to-date
repository of the kernel code may be found at:
http://sosdg.org/~coywolf/lxr/
http://users.sosdg.org/~qiyong/lxr/
The development process
@ -249,6 +249,9 @@ process is as follows:
release a new -rc kernel every week.
- Process continues until the kernel is considered "ready", the
process should last around 6 weeks.
- A list of known regressions present in each -rc release is
tracked at the following URI:
http://kernelnewbies.org/known_regressions
It is worth mentioning what Andrew Morton wrote on the linux-kernel
mailing list about kernel releases:
@ -381,7 +384,7 @@ One of the best ways to put into practice your hacking skills is by fixing
bugs reported by other people. Not only you will help to make the kernel
more stable, you'll learn to fix real world problems and you will improve
your skills, and other developers will be aware of your presence. Fixing
bugs is one of the best ways to earn merit amongst the developers, because
bugs is one of the best ways to get merits among other developers, because
not many people like wasting time fixing other people's bugs.
To work in the already reported bug reports, go to http://bugzilla.kernel.org.

View File

@ -241,68 +241,7 @@ address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
will fail enabling MSI-X on its hardware device when it calls the function
pci_enable_msix().
5.3.2 Handling MSI-X allocation
Determining the number of MSI-X vectors allocated to a function is
dependent on the number of MSI capable devices and MSI-X capable
devices populated in the system. The policy of allocating MSI-X
vectors to a function is defined as the following:
#of MSI-X vectors allocated to a function = (x - y)/z where
x = The number of available PCI vector resources by the time
the device driver calls pci_enable_msix(). The PCI vector
resources is the sum of the number of unassigned vectors
(new) and the number of released vectors when any MSI/MSI-X
device driver switches its hardware device back to a legacy
mode or is hot-removed. The number of unassigned vectors
may exclude some vectors reserved, as defined in parameter
NR_HP_RESERVED_VECTORS, for the case where the system is
capable of supporting hot-add/hot-remove operations. Users
may change the value defined in NR_HR_RESERVED_VECTORS to
meet their specific needs.
y = The number of MSI capable devices populated in the system.
This policy ensures that each MSI capable device has its
vector reserved to avoid the case where some MSI-X capable
drivers may attempt to claim all available vector resources.
z = The number of MSI-X capable devices populated in the system.
This policy ensures that maximum (x - y) is distributed
evenly among MSI-X capable devices.
Note that the PCI subsystem scans y and z during a bus enumeration.
When the PCI subsystem completes configuring MSI/MSI-X capability
structure of a device as requested by its device driver, y/z is
decremented accordingly.
5.3.3 Handling MSI-X shortages
For the case where fewer MSI-X vectors are allocated to a function
than requested, the function pci_enable_msix() will return the
maximum number of MSI-X vectors available to the caller. A device
driver may re-send its request with fewer or equal vectors indicated
in the return. For example, if a device driver requests 5 vectors, but
the number of available vectors is 3 vectors, a value of 3 will be
returned as a result of pci_enable_msix() call. A function could be
designed for its driver to use only 3 MSI-X table entries as
different combinations as ABC--, A-B-C, A--CB, etc. Note that this
patch does not support multiple entries with the same vector. Such
attempt by a device driver to use 5 MSI-X table entries with 3 vectors
as ABBCC, AABCC, BCCBA, etc will result as a failure by the function
pci_enable_msix(). Below are the reasons why supporting multiple
entries with the same vector is an undesirable solution.
- The PCI subsystem cannot determine the entry that
generated the message to mask/unmask MSI while handling
software driver ISR. Attempting to walk through all MSI-X
table entries (2048 max) to mask/unmask any match vector
is an undesirable solution.
- Walking through all MSI-X table entries (2048 max) to handle
SMP affinity of any match vector is an undesirable solution.
5.3.4 API pci_enable_msix
5.3.2 API pci_enable_msix
int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
@ -339,7 +278,7 @@ a failure. This failure may be a result of duplicate entries
specified in second argument, or a result of no available vector,
or a result of failing to initialize MSI-X table entries.
5.3.5 API pci_disable_msix
5.3.3 API pci_disable_msix
void pci_disable_msix(struct pci_dev *dev)
@ -349,7 +288,7 @@ always call free_irq() on all MSI-X vectors it has done request_irq()
on before calling this API. Failure to do so results in a BUG_ON() and
a device will be left with MSI-X enabled and leaks its vectors.
5.3.6 MSI-X mode vs. legacy mode diagram
5.3.4 MSI-X mode vs. legacy mode diagram
The below diagram shows the events which switch the interrupt
mode on the MSI-X capable device function between MSI-X mode and
@ -407,7 +346,7 @@ between MSI mod MSI-X mode during a run-time.
MSI/MSI-X support requires support from both system hardware and
individual hardware device functions.
5.5.1 System hardware support
5.5.1 Required x86 hardware support
Since the target of MSI address is the local APIC CPU, enabling
MSI/MSI-X support in the Linux kernel is dependent on whether existing

View File

@ -166,7 +166,7 @@ To solve this problem, you really only have two options:
The option of being unfailingly polite really doesn't exist. Nobody will
trust somebody who is so clearly hiding his true character.
(*) Paul Simon sang "Fifty Ways to Lose Your Lover", because quite
(*) Paul Simon sang "Fifty Ways to Leave Your Lover", because quite
frankly, "A Million Ways to Tell a Developer He Is a D*ckhead" doesn't
scan nearly as well. But I'm sure he thought about it.

View File

@ -122,11 +122,11 @@ then only post say 15 or so at a time and wait for review and integration.
Check your patch for basic style violations, details of which can be
found in Documentation/CodingStyle. Failure to do so simply wastes
the reviewers time and will get your patch rejected, probabally
the reviewers time and will get your patch rejected, probably
without even being read.
At a minimum you should check your patches with the patch style
checker prior to submission (scripts/patchcheck.pl). You should
checker prior to submission (scripts/checkpatch.pl). You should
be able to justify all violations that remain in your patch.
@ -560,7 +560,7 @@ NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
<http://marc.theaimsgroup.com/?l=linux-kernel&m=112112749912944&w=2>
Kernel Documentation/CodingStyle:
<http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle>
<http://users.sosdg.org/~qiyong/lxr/source/Documentation/CodingStyle>
Linus Torvalds's mail on the canonical patch format:
<http://lkml.org/lkml/2005/4/7/183>

View File

@ -196,7 +196,7 @@ void print_delayacct(struct taskstats *t)
"IO %15s%15s\n"
" %15llu%15llu\n"
"MEM %15s%15s\n"
" %15llu%15llu\n"
" %15llu%15llu\n",
"count", "real total", "virtual total", "delay total",
t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
t->cpu_delay_total,

View File

@ -79,9 +79,9 @@ and how to prepare flush requests. Note that the term 'ordered' is
used to indicate the whole sequence of performing barrier requests
including draining and flushing.
typedef void (prepare_flush_fn)(request_queue_t *q, struct request *rq);
typedef void (prepare_flush_fn)(struct request_queue *q, struct request *rq);
int blk_queue_ordered(request_queue_t *q, unsigned ordered,
int blk_queue_ordered(struct request_queue *q, unsigned ordered,
prepare_flush_fn *prepare_flush_fn);
@q : the queue in question
@ -92,7 +92,7 @@ int blk_queue_ordered(request_queue_t *q, unsigned ordered,
For example, SCSI disk driver's prepare_flush_fn looks like the
following.
static void sd_prepare_flush(request_queue_t *q, struct request *rq)
static void sd_prepare_flush(struct request_queue *q, struct request *rq)
{
memset(rq->cmd, 0, sizeof(rq->cmd));
rq->cmd_type = REQ_TYPE_BLOCK_PC;

View File

@ -477,9 +477,9 @@ With this multipage bio design:
the same bi_io_vec array, but with the index and size accordingly modified)
- A linked list of bios is used as before for unrelated merges (*) - this
avoids reallocs and makes independent completions easier to handle.
- Code that traverses the req list needs to make a distinction between
segments of a request (bio_for_each_segment) and the distinct completion
units/bios (rq_for_each_bio).
- Code that traverses the req list can find all the segments of a bio
by using rq_for_each_segment. This handles the fact that a request
has multiple bios, each of which can have multiple segments.
- Drivers which can't process a large bio in one shot can use the bi_idx
field to keep track of the next bio_vec entry to process.
(e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
@ -664,14 +664,14 @@ in lvm or md.
3.2.1 Traversing segments and completion units in a request
The macros bio_for_each_segment() and rq_for_each_bio() should be used for
traversing the bios in the request list (drivers should avoid directly
trying to do it themselves). Using these helpers should also make it easier
to cope with block changes in the future.
The macro rq_for_each_segment() should be used for traversing the bios
in the request list (drivers should avoid directly trying to do it
themselves). Using these helpers should also make it easier to cope
with block changes in the future.
rq_for_each_bio(bio, rq)
bio_for_each_segment(bio_vec, bio, i)
/* bio_vec is now current segment */
struct req_iterator iter;
rq_for_each_segment(bio_vec, rq, iter)
/* bio_vec is now current segment */
I/O completion callbacks are per-bio rather than per-segment, so drivers
that traverse bio chains on completion need to keep that in mind. Drivers
@ -740,12 +740,12 @@ Block now offers some simple generic functionality to help support command
queueing (typically known as tagged command queueing), ie manage more than
one outstanding command on a queue at any given time.
blk_queue_init_tags(request_queue_t *q, int depth)
blk_queue_init_tags(struct request_queue *q, int depth)
Initialize internal command tagging structures for a maximum
depth of 'depth'.
blk_queue_free_tags((request_queue_t *q)
blk_queue_free_tags((struct request_queue *q)
Teardown tag info associated with the queue. This will be done
automatically by block if blk_queue_cleanup() is called on a queue
@ -754,7 +754,7 @@ one outstanding command on a queue at any given time.
The above are initialization and exit management, the main helpers during
normal operations are:
blk_queue_start_tag(request_queue_t *q, struct request *rq)
blk_queue_start_tag(struct request_queue *q, struct request *rq)
Start tagged operation for this request. A free tag number between
0 and 'depth' is assigned to the request (rq->tag holds this number),
@ -762,7 +762,7 @@ normal operations are:
for this queue is already achieved (or if the tag wasn't started for
some other reason), 1 is returned. Otherwise 0 is returned.
blk_queue_end_tag(request_queue_t *q, struct request *rq)
blk_queue_end_tag(struct request_queue *q, struct request *rq)
End tagged operation on this request. 'rq' is removed from the internal
book keeping structures.
@ -781,7 +781,7 @@ queue. For instance, on IDE any tagged request error needs to clear both
the hardware and software block queue and enable the driver to sanely restart
all the outstanding requests. There's a third helper to do that:
blk_queue_invalidate_tags(request_queue_t *q)
blk_queue_invalidate_tags(struct request_queue *q)
Clear the internal block tag queue and re-add all the pending requests
to the request queue. The driver will receive them again on the

View File

@ -86,8 +86,15 @@ extern int sys_ioprio_get(int, int);
#error "Unsupported arch"
#endif
_syscall3(int, ioprio_set, int, which, int, who, int, ioprio);
_syscall2(int, ioprio_get, int, which, int, who);
static inline int ioprio_set(int which, int who, int ioprio)
{
return syscall(__NR_ioprio_set, which, who, ioprio);
}
static inline int ioprio_get(int which, int who)
{
return syscall(__NR_ioprio_get, which, who);
}
enum {
IOPRIO_CLASS_NONE,

View File

@ -83,6 +83,6 @@ struct bio *bio DBI First bio in request
struct bio *biotail DBI Last bio in request
request_queue_t *q DB Request queue this request belongs to
struct request_queue *q DB Request queue this request belongs to
struct request_list *rl B Request list this request came from

View File

@ -124,9 +124,8 @@ static void cn_test_timer_func(unsigned long __data)
struct cn_msg *m;
char data[32];
m = kmalloc(sizeof(*m) + sizeof(data), GFP_ATOMIC);
m = kzalloc(sizeof(*m) + sizeof(data), GFP_ATOMIC);
if (m) {
memset(m, 0, sizeof(*m) + sizeof(data));
memcpy(&m->id, &cn_test_id, sizeof(m->id));
m->seq = cn_test_timer_counter;

View File

@ -29,7 +29,7 @@ In newer kernels, the following are also available:
If sysfs is enabled, the contents of /sys/class/vtconsole can be
examined. This shows the console backends currently registered by the
system which are named vtcon<n> where <n> is an integer fro 0 to 15. Thus:
system which are named vtcon<n> where <n> is an integer from 0 to 15. Thus:
ls /sys/class/vtconsole
. .. vtcon0 vtcon1

View File

@ -0,0 +1,219 @@
Asynchronous Transfers/Transforms API
1 INTRODUCTION
2 GENEALOGY
3 USAGE
3.1 General format of the API
3.2 Supported operations
3.3 Descriptor management
3.4 When does the operation execute?
3.5 When does the operation complete?
3.6 Constraints
3.7 Example
4 DRIVER DEVELOPER NOTES
4.1 Conformance points
4.2 "My application needs finer control of hardware channels"
5 SOURCE
---
1 INTRODUCTION
The async_tx API provides methods for describing a chain of asynchronous
bulk memory transfers/transforms with support for inter-transactional
dependencies. It is implemented as a dmaengine client that smooths over
the details of different hardware offload engine implementations. Code
that is written to the API can optimize for asynchronous operation and
the API will fit the chain of operations to the available offload
resources.
2 GENEALOGY
The API was initially designed to offload the memory copy and
xor-parity-calculations of the md-raid5 driver using the offload engines
present in the Intel(R) Xscale series of I/O processors. It also built
on the 'dmaengine' layer developed for offloading memory copies in the
network stack using Intel(R) I/OAT engines. The following design
features surfaced as a result:
1/ implicit synchronous path: users of the API do not need to know if
the platform they are running on has offload capabilities. The
operation will be offloaded when an engine is available and carried out
in software otherwise.
2/ cross channel dependency chains: the API allows a chain of dependent
operations to be submitted, like xor->copy->xor in the raid5 case. The
API automatically handles cases where the transition from one operation
to another implies a hardware channel switch.
3/ dmaengine extensions to support multiple clients and operation types
beyond 'memcpy'
3 USAGE
3.1 General format of the API:
struct dma_async_tx_descriptor *
async_<operation>(<op specific parameters>,
enum async_tx_flags flags,
struct dma_async_tx_descriptor *dependency,
dma_async_tx_callback callback_routine,
void *callback_parameter);
3.2 Supported operations:
memcpy - memory copy between a source and a destination buffer
memset - fill a destination buffer with a byte value
xor - xor a series of source buffers and write the result to a
destination buffer
xor_zero_sum - xor a series of source buffers and set a flag if the
result is zero. The implementation attempts to prevent
writes to memory
3.3 Descriptor management:
The return value is non-NULL and points to a 'descriptor' when the operation
has been queued to execute asynchronously. Descriptors are recycled
resources, under control of the offload engine driver, to be reused as
operations complete. When an application needs to submit a chain of
operations it must guarantee that the descriptor is not automatically recycled
before the dependency is submitted. This requires that all descriptors be
acknowledged by the application before the offload engine driver is allowed to
recycle (or free) the descriptor. A descriptor can be acked by one of the
following methods:
1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent
descriptor of a new operation.
3/ calling async_tx_ack() on the descriptor.
3.4 When does the operation execute?
Operations do not immediately issue after return from the
async_<operation> call. Offload engine drivers batch operations to
improve performance by reducing the number of mmio cycles needed to
manage the channel. Once a driver-specific threshold is met the driver
automatically issues pending operations. An application can force this
event by calling async_tx_issue_pending_all(). This operates on all
channels since the application has no knowledge of channel to operation
mapping.
3.5 When does the operation complete?
There are two methods for an application to learn about the completion
of an operation.
1/ Call dma_wait_for_async_tx(). This call causes the CPU to spin while
it polls for the completion of the operation. It handles dependency
chains and issuing pending operations.
2/ Specify a completion callback. The callback routine runs in tasklet
context if the offload engine driver supports interrupts, or it is
called in application context if the operation is carried out
synchronously in software. The callback can be set in the call to
async_<operation>, or when the application needs to submit a chain of
unknown length it can use the async_trigger_callback() routine to set a
completion interrupt/callback at the end of the chain.
3.6 Constraints:
1/ Calls to async_<operation> are not permitted in IRQ context. Other
contexts are permitted provided constraint #2 is not violated.
2/ Completion callback routines cannot submit new operations. This
results in recursion in the synchronous case and spin_locks being
acquired twice in the asynchronous case.
3.7 Example:
Perform a xor->copy->xor operation where each operation depends on the
result from the previous operation:
void complete_xor_copy_xor(void *param)
{
printk("complete\n");
}
int run_xor_copy_xor(struct page **xor_srcs,
int xor_src_cnt,
struct page *xor_dest,
size_t xor_len,
struct page *copy_src,
struct page *copy_dest,
size_t copy_len)
{
struct dma_async_tx_descriptor *tx;
tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL);
tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len,
ASYNC_TX_DEP_ACK, tx, NULL, NULL);
tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK,
tx, complete_xor_copy_xor, NULL);
async_tx_issue_pending_all();
}
See include/linux/async_tx.h for more information on the flags. See the
ops_run_* and ops_complete_* routines in drivers/md/raid5.c for more
implementation examples.
4 DRIVER DEVELOPMENT NOTES
4.1 Conformance points:
There are a few conformance points required in dmaengine drivers to
accommodate assumptions made by applications using the async_tx API:
1/ Completion callbacks are expected to happen in tasklet context
2/ dma_async_tx_descriptor fields are never manipulated in IRQ context
3/ Use async_tx_run_dependencies() in the descriptor clean up path to
handle submission of dependent operations
4.2 "My application needs finer control of hardware channels"
This requirement seems to arise from cases where a DMA engine driver is
trying to support device-to-memory DMA. The dmaengine and async_tx
implementations were designed for offloading memory-to-memory
operations; however, there are some capabilities of the dmaengine layer
that can be used for platform-specific channel management.
Platform-specific constraints can be handled by registering the
application as a 'dma_client' and implementing a 'dma_event_callback' to
apply a filter to the available channels in the system. Before showing
how to implement a custom dma_event callback some background of
dmaengine's client support is required.
The following routines in dmaengine support multiple clients requesting
use of a channel:
- dma_async_client_register(struct dma_client *client)
- dma_async_client_chan_request(struct dma_client *client)
dma_async_client_register takes a pointer to an initialized dma_client
structure. It expects that the 'event_callback' and 'cap_mask' fields
are already initialized.
dma_async_client_chan_request triggers dmaengine to notify the client of
all channels that satisfy the capability mask. It is up to the client's
event_callback routine to track how many channels the client needs and
how many it is currently using. The dma_event_callback routine returns a
dma_state_client code to let dmaengine know the status of the
allocation.
Below is the example of how to extend this functionality for
platform-specific filtering of the available channels beyond the
standard capability mask:
static enum dma_state_client
my_dma_client_callback(struct dma_client *client,
struct dma_chan *chan, enum dma_state state)
{
struct dma_device *dma_dev;
struct my_platform_specific_dma *plat_dma_dev;
dma_dev = chan->device;
plat_dma_dev = container_of(dma_dev,
struct my_platform_specific_dma,
dma_dev);
if (!plat_dma_dev->platform_specific_capability)
return DMA_DUP;
. . .
}
5 SOURCE
include/linux/dmaengine.h: core header file for DMA drivers and clients
drivers/dma/dmaengine.c: offload engine channel management routines
drivers/dma/: location for offload engine drivers
include/linux/async_tx.h: core header file for the async_tx api
crypto/async_tx/async_tx.c: async_tx interface to dmaengine and common code
crypto/async_tx/async_memcpy.c: copy offload
crypto/async_tx/async_memset.c: memory fill offload
crypto/async_tx/async_xor.c: xor and xor zero sum offload

View File

@ -94,6 +94,8 @@ Your cooperation is appreciated.
9 = /dev/urandom Faster, less secure random number gen.
10 = /dev/aio Asynchronous I/O notification interface
11 = /dev/kmsg Writes to this come out as printk's
12 = /dev/oldmem Used by crashdump kernels to access
the memory of the kernel that crashed.
1 block RAM disk
0 = /dev/ram0 First RAM disk

View File

@ -18,6 +18,7 @@
*.moc
*.mod.c
*.o
*.o.*
*.orig
*.out
*.pdf
@ -163,6 +164,8 @@ raid6tables.c
relocs
series
setup
setup.bin
setup.elf
sim710_d.h*
sImage
sm_tbl*

View File

@ -207,7 +207,7 @@ responsibility. This is usually non-issue because bus ops and
resource allocations already do the job.
For an example of single-instance devres type, read pcim_iomap_table()
in lib/iomap.c.
in lib/devres.c.
All devres interface functions can be called without context if the
right gfp mask is given.

View File

@ -2,22 +2,42 @@
EDAC - Error Detection And Correction
Written by Doug Thompson <norsk5@xmission.com>
Written by Doug Thompson <dougthompson@xmission.com>
7 Dec 2005
17 Jul 2007 Updated
EDAC was written by:
Thayne Harbaugh,
modified by Dave Peterson, Doug Thompson, et al,
from the bluesmoke.sourceforge.net project.
EDAC is maintained and written by:
Doug Thompson, Dave Jiang, Dave Peterson et al,
original author: Thayne Harbaugh,
Contact:
website: bluesmoke.sourceforge.net
mailing list: bluesmoke-devel@lists.sourceforge.net
"bluesmoke" was the name for this device driver when it was "out-of-tree"
and maintained at sourceforge.net. When it was pushed into 2.6.16 for the
first time, it was renamed to 'EDAC'.
The bluesmoke project at sourceforge.net is now utilized as a 'staging area'
for EDAC development, before it is sent upstream to kernel.org
At the bluesmoke/EDAC project site, is a series of quilt patches against
recent kernels, stored in a SVN respository. For easier downloading, there
is also a tarball snapshot available.
============================================================================
EDAC PURPOSE
The 'edac' kernel module goal is to detect and report errors that occur
within the computer system. In the initial release, memory Correctable Errors
(CE) and Uncorrectable Errors (UE) are the primary errors being harvested.
within the computer system running under linux.
MEMORY
In the initial release, memory Correctable Errors (CE) and Uncorrectable
Errors (UE) are the primary errors being harvested. These types of errors
are harvested by the 'edac_mc' class of device.
Detecting CE events, then harvesting those events and reporting them,
CAN be a predictor of future UE events. With CE events, the system can
@ -25,9 +45,27 @@ continue to operate, but with less safety. Preventive maintenance and
proactive part replacement of memory DIMMs exhibiting CEs can reduce
the likelihood of the dreaded UE events and system 'panics'.
NON-MEMORY
A new feature for EDAC, the edac_device class of device, was added in
the 2.6.23 version of the kernel.
This new device type allows for non-memory type of ECC hardware detectors
to have their states harvested and presented to userspace via the sysfs
interface.
Some architectures have ECC detectors for L1, L2 and L3 caches, along with DMA
engines, fabric switches, main data path switches, interconnections,
and various other hardware data paths. If the hardware reports it, then
a edac_device device probably can be constructed to harvest and present
that to userspace.
PCI BUS SCANNING
In addition, PCI Bus Parity and SERR Errors are scanned for on PCI devices
in order to determine if errors are occurring on data transfers.
The presence of PCI Parity errors must be examined with a grain of salt.
There are several add-in adapters that do NOT follow the PCI specification
with regards to Parity generation and reporting. The specification says
@ -35,11 +73,17 @@ the vendor should tie the parity status bits to 0 if they do not intend
to generate parity. Some vendors do not do this, and thus the parity bit
can "float" giving false positives.
[There are patches in the kernel queue which will allow for storage of
quirks of PCI devices reporting false parity positives. The 2.6.18
kernel should have those patches included. When that becomes available,
then EDAC will be patched to utilize that information to "skip" such
devices.]
In the kernel there is a pci device attribute located in sysfs that is
checked by the EDAC PCI scanning code. If that attribute is set,
PCI parity/error scannining is skipped for that device. The attribute
is:
broken_parity_status
as is located in /sys/devices/pci<XXX>/0000:XX:YY.Z directorys for
PCI devices.
FUTURE HARDWARE SCANNING
EDAC will have future error detectors that will be integrated with
EDAC or added to it, in the following list:
@ -57,13 +101,14 @@ and the like.
============================================================================
EDAC VERSIONING
EDAC is composed of a "core" module (edac_mc.ko) and several Memory
EDAC is composed of a "core" module (edac_core.ko) and several Memory
Controller (MC) driver modules. On a given system, the CORE
is loaded and one MC driver will be loaded. Both the CORE and
the MC driver have individual versions that reflect current release
level of their respective modules. Thus, to "report" on what version
a system is running, one must report both the CORE's and the
MC driver's versions.
the MC driver (or edac_device driver) have individual versions that reflect
current release level of their respective modules.
Thus, to "report" on what version a system is running, one must report both
the CORE's and the MC driver's versions.
LOADING
@ -88,8 +133,9 @@ EDAC sysfs INTERFACE
EDAC presents a 'sysfs' interface for control, reporting and attribute
reporting purposes.
EDAC lives in the /sys/devices/system/edac directory. Within this directory
there currently reside 2 'edac' components:
EDAC lives in the /sys/devices/system/edac directory.
Within this directory there currently reside 2 'edac' components:
mc memory controller(s) system
pci PCI control and status system
@ -188,7 +234,7 @@ In directory 'mc' are EDAC system overall control and attribute files:
Panic on UE control file:
'panic_on_ue'
'edac_mc_panic_on_ue'
An uncorrectable error will cause a machine panic. This is usually
desirable. It is a bad idea to continue when an uncorrectable error
@ -199,12 +245,12 @@ Panic on UE control file:
LOAD TIME: module/kernel parameter: panic_on_ue=[0|1]
RUN TIME: echo "1" >/sys/devices/system/edac/mc/panic_on_ue
RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_panic_on_ue
Log UE control file:
'log_ue'
'edac_mc_log_ue'
Generate kernel messages describing uncorrectable errors. These errors
are reported through the system message log system. UE statistics
@ -212,12 +258,12 @@ Log UE control file:
LOAD TIME: module/kernel parameter: log_ue=[0|1]
RUN TIME: echo "1" >/sys/devices/system/edac/mc/log_ue
RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ue
Log CE control file:
'log_ce'
'edac_mc_log_ce'
Generate kernel messages describing correctable errors. These
errors are reported through the system message log system.
@ -225,12 +271,12 @@ Log CE control file:
LOAD TIME: module/kernel parameter: log_ce=[0|1]
RUN TIME: echo "1" >/sys/devices/system/edac/mc/log_ce
RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ce
Polling period control file:
'poll_msec'
'edac_mc_poll_msec'
The time period, in milliseconds, for polling for error information.
Too small a value wastes resources. Too large a value might delay
@ -241,7 +287,7 @@ Polling period control file:
LOAD TIME: module/kernel parameter: poll_msec=[0|1]
RUN TIME: echo "1000" >/sys/devices/system/edac/mc/poll_msec
RUN TIME: echo "1000" >/sys/devices/system/edac/mc/edac_mc_poll_msec
============================================================================
@ -587,3 +633,95 @@ Parity Count:
=======================================================================
EDAC_DEVICE type of device
In the header file, edac_core.h, there is a series of edac_device structures
and APIs for the EDAC_DEVICE.
User space access to an edac_device is through the sysfs interface.
At the location /sys/devices/system/edac (sysfs) new edac_device devices will
appear.
There is a three level tree beneath the above 'edac' directory. For example,
the 'test_device_edac' device (found at the bluesmoke.sourceforget.net website)
installs itself as:
/sys/devices/systm/edac/test-instance
in this directory are various controls, a symlink and one or more 'instance'
directorys.
The standard default controls are:
log_ce boolean to log CE events
log_ue boolean to log UE events
panic_on_ue boolean to 'panic' the system if an UE is encountered
(default off, can be set true via startup script)
poll_msec time period between POLL cycles for events
The test_device_edac device adds at least one of its own custom control:
test_bits which in the current test driver does nothing but
show how it is installed. A ported driver can
add one or more such controls and/or attributes
for specific uses.
One out-of-tree driver uses controls here to allow
for ERROR INJECTION operations to hardware
injection registers
The symlink points to the 'struct dev' that is registered for this edac_device.
INSTANCES
One or more instance directories are present. For the 'test_device_edac' case:
test-instance0
In this directory there are two default counter attributes, which are totals of
counter in deeper subdirectories.
ce_count total of CE events of subdirectories
ue_count total of UE events of subdirectories
BLOCKS
At the lowest directory level is the 'block' directory. There can be 0, 1
or more blocks specified in each instance.
test-block0
In this directory the default attributes are:
ce_count which is counter of CE events for this 'block'
of hardware being monitored
ue_count which is counter of UE events for this 'block'
of hardware being monitored
The 'test_device_edac' device adds 4 attributes and 1 control:
test-block-bits-0 for every POLL cycle this counter
is incremented
test-block-bits-1 every 10 cycles, this counter is bumped once,
and test-block-bits-0 is set to 0
test-block-bits-2 every 100 cycles, this counter is bumped once,
and test-block-bits-1 is set to 0
test-block-bits-3 every 1000 cycles, this counter is bumped once,
and test-block-bits-2 is set to 0
reset-counters writing ANY thing to this control will
reset all the above counters.
Use of the 'test_device_edac' driver should any others to create their own
unique drivers for their hardware systems.
The 'test_device_edac' sample driver is located at the
bluesmoke.sourceforge.net project site for EDAC.

View File

@ -9,19 +9,29 @@ for accessing the i2c bus and the gpio pins of the bt8xx chipset.
Please see Documentation/dvb/cards.txt => o Cards based on the Conexant Bt8xx PCI bridge:
Compiling kernel please enable:
a.)"Device drivers" => "Multimedia devices" => "Video For Linux" => "BT848 Video For Linux"
b.)"Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices"
=> "DVB for Linux" "DVB Core Support" "Bt8xx based PCI Cards"
a.)"Device drivers" => "Multimedia devices" => "Video For Linux" => "Enable Video for Linux API 1 (DEPRECATED)"
b.)"Device drivers" => "Multimedia devices" => "Video For Linux" => "Video Capture Adapters" => "BT848 Video For Linux"
c.)"Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices" => "DVB for Linux" "DVB Core Support" "Bt8xx based PCI Cards"
Please use the following options with care as deselection of drivers which are in fact necessary
may result in DVB devices that cannot be tuned due to lack of driver support:
You can save RAM by deselecting every frontend module that your DVB card does not need.
First please remove the static dependency of DVB card drivers on all frontend modules for all possible card variants by enabling:
d.) "Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices"
=> "DVB for Linux" "DVB Core Support" "Load and attach frontend modules as needed"
If you know the frontend driver that your card needs please enable:
e.)"Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices"
=> "DVB for Linux" "DVB Core Support" "Customise DVB Frontends" => "Customise the frontend modules to build"
Then please select your card-specific frontend module.
2) Loading Modules
==================
In default cases bttv is loaded automatically.
To load the backend either place dvb-bt8xx in etc/modules, or apply manually:
$ modprobe dvb-bt8xx
All frontends will be loaded automatically.
Regular case: If the bttv driver detects a bt8xx-based DVB card, all frontend and backend modules will be loaded automatically.
Exceptions are:
- Old TwinHan DST cards or clones with or without CA slot and not containing an Eeprom.
People running udev please see Documentation/dvb/udev.txt.
In the following cases overriding the PCI type detection for dvb-bt8xx might be necessary:
@ -30,7 +40,6 @@ In the following cases overriding the PCI type detection for dvb-bt8xx might be
------------------------------
$ modprobe bttv card=113
$ modprobe dvb-bt8xx
$ modprobe dst
Useful parameters for verbosity level and debugging the dst module:
@ -65,10 +74,9 @@ DViCO FusionHDTV 5 Lite: 135
Notice: The order of the card ID should be uprising:
Example:
$ modprobe bttv card=113 card=135
$ modprobe dvb-bt8xx
For a full list of card ID's please see Documentation/video4linux/CARDLIST.bttv.
In case of further problems send questions to the mailing list: www.linuxdvb.org.
In case of further problems please subscribe and send questions to the mailing list: linux-dvb@linuxtv.org.
Authors: Richard Walker,
Jamie Honan,

View File

@ -150,7 +150,7 @@ Some very frequently asked questions about linuxtv-dvb
- saa7146_vv: SAA7146 video and vbi functions. These are only needed
for full-featured cards.
- video-buf: capture helper module for the saa7146_vv driver. This
- videobuf-dma-sg: capture helper module for the saa7146_vv driver. This
one is responsible to handle capture buffers.
- dvb-ttpci: The main driver for AV7110 based, full-featured

View File

@ -24,7 +24,8 @@ use IO::Handle;
@components = ( "sp8870", "sp887x", "tda10045", "tda10046",
"tda10046lifeview", "av7110", "dec2000t", "dec2540t",
"dec3000s", "vp7041", "dibusb", "nxt2002", "nxt2004",
"or51211", "or51132_qam", "or51132_vsb", "bluebird");
"or51211", "or51132_qam", "or51132_vsb", "bluebird",
"opera1");
# Check args
syntax() if (scalar(@ARGV) != 1);
@ -56,7 +57,7 @@ syntax();
sub sp8870 {
my $sourcefile = "tt_Premium_217g.zip";
my $url = "http://www.technotrend.de/new/217g/$sourcefile";
my $url = "http://www.softwarepatch.pl/9999ccd06a4813cb827dbb0005071c71/$sourcefile";
my $hash = "53970ec17a538945a6d8cb608a7b3899";
my $outfile = "dvb-fe-sp8870.fw";
my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
@ -110,21 +111,21 @@ sub tda10045 {
}
sub tda10046 {
my $sourcefile = "tt_budget_217g.zip";
my $url = "http://www.technotrend.de/new/217g/$sourcefile";
my $hash = "6a7e1e2f2644b162ff0502367553c72d";
my $outfile = "dvb-fe-tda10046.fw";
my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
my $sourcefile = "TT_PCI_2.19h_28_11_2006.zip";
my $url = "http://technotrend-online.com/download/software/219/$sourcefile";
my $hash = "6a7e1e2f2644b162ff0502367553c72d";
my $outfile = "dvb-fe-tda10046.fw";
my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
checkstandard();
checkstandard();
wgetfile($sourcefile, $url);
unzip($sourcefile, $tmpdir);
extract("$tmpdir/software/OEM/PCI/App/ttlcdacc.dll", 0x3f731, 24478, "$tmpdir/fwtmp");
verify("$tmpdir/fwtmp", $hash);
copy("$tmpdir/fwtmp", $outfile);
wgetfile($sourcefile, $url);
unzip($sourcefile, $tmpdir);
extract("$tmpdir/TT_PCI_2.19h_28_11_2006/software/OEM/PCI/App/ttlcdacc.dll", 0x65389, 24478, "$tmpdir/fwtmp");
verify("$tmpdir/fwtmp", $hash);
copy("$tmpdir/fwtmp", $outfile);
$outfile;
$outfile;
}
sub tda10046lifeview {
@ -210,6 +211,45 @@ sub dec3000s {
$outfile;
}
sub opera1{
my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 0);
checkstandard();
my $fwfile1="dvb-usb-opera1-fpga-01.fw";
my $fwfile2="dvb-usb-opera-01.fw";
extract("2830SCap2.sys", 0x62e8, 55024, "$tmpdir/opera1-fpga.fw");
extract("2830SLoad2.sys",0x3178,0x3685-0x3178,"$tmpdir/fw1part1");
extract("2830SLoad2.sys",0x0980,0x3150-0x0980,"$tmpdir/fw1part2");
delzero("$tmpdir/fw1part1","$tmpdir/fw1part1-1");
delzero("$tmpdir/fw1part2","$tmpdir/fw1part2-1");
verify("$tmpdir/fw1part1-1","5e0909858fdf0b5b09ad48b9fe622e70");
verify("$tmpdir/fw1part2-1","d6e146f321427e931df2c6fcadac37a1");
verify("$tmpdir/opera1-fpga.fw","0f8133f5e9051f5f3c1928f7e5a1b07d");
my $RES1="\x01\x92\x7f\x00\x01\x00";
my $RES0="\x01\x92\x7f\x00\x00\x00";
my $DAT1="\x01\x00\xe6\x00\x01\x00";
my $DAT0="\x01\x00\xe6\x00\x00\x00";
open FW,">$tmpdir/opera.fw";
print FW "$RES1";
print FW "$DAT1";
print FW "$RES1";
print FW "$DAT1";
appendfile(FW,"$tmpdir/fw1part1-1");
print FW "$RES0";
print FW "$DAT0";
print FW "$RES1";
print FW "$DAT1";
appendfile(FW,"$tmpdir/fw1part2-1");
print FW "$RES1";
print FW "$DAT1";
print FW "$RES0";
print FW "$DAT0";
copy ("$tmpdir/opera1-fpga.fw",$fwfile1);
copy ("$tmpdir/opera.fw",$fwfile2);
$fwfile1.",".$fwfile2;
}
sub vp7041 {
my $sourcefile = "2.422.zip";
@ -440,6 +480,25 @@ sub appendfile {
close(INFILE);
}
sub delzero{
my ($infile,$outfile) =@_;
open INFILE,"<$infile";
open OUTFILE,">$outfile";
while (1){
$rcount=sysread(INFILE,$buf,22);
$len=ord(substr($buf,0,1));
print OUTFILE substr($buf,0,1);
print OUTFILE substr($buf,2,$len+3);
last if ($rcount<1);
printf OUTFILE "%c",0;
#print $len." ".length($buf)."\n";
}
close(INFILE);
close(OUTFILE);
}
sub syntax() {
print STDERR "syntax: get_dvb_firmware <component>\n";
print STDERR "Supported components:\n";

View File

@ -0,0 +1,27 @@
To extract the firmware for the Opera DVB-S1 USB-Box
you need to copy the files:
2830SCap2.sys
2830SLoad2.sys
from the windriver disk into this directory.
Then run
./get_dvb_firware opera1
and after that you have 2 files:
dvb-usb-opera-01.fw
dvb-usb-opera1-fpga-01.fw
in here.
Copy them into /lib/firmware/ .
After that the driver can load the firmware
(if you have enabled firmware loading
in kernel config and have hotplug running).
Marco Gittler <g.marco@freenet.de>

View File

@ -9,14 +9,13 @@ one found in the Dreamcast.
Advantages:
* It provides a nice large console (128 cols + 48 lines with 1024x768)
without using tiny, unreadable fonts.
without using tiny, unreadable fonts (NOT on the Dreamcast)
* You can run XF86_FBDev on top of /dev/fb0
* Most important: boot logo :-)
Disadvantages:
* Driver is currently limited to the Dreamcast PowerVR 2 implementation
at the time of this writing.
* Driver is largely untested on non-Dreamcast systems.
Configuration
=============
@ -29,11 +28,16 @@ Accepted options:
font:X - default font to use. All fonts are supported, including the
SUN12x22 font which is very nice at high resolutions.
mode:X - default video mode. The following video modes are supported:
640x240-60, 640x480-60.
mode:X - default video mode with format [xres]x[yres]-<bpp>@<refresh rate>
The following video modes are supported:
640x640-16@60, 640x480-24@60, 640x480-32@60. The Dreamcast
defaults to 640x480-16@60. At the time of writing the
24bpp and 32bpp modes function poorly. Work to fix that is
ongoing
Note: the 640x240 mode is currently broken, and should not be
used for any reason. It is only mentioned as a reference.
used for any reason. It is only mentioned here as a reference.
inverse - invert colors on screen (for LCD displays)
@ -52,10 +56,10 @@ output:X - output type. This can be any of the following: pal, ntsc, and
X11
===
XF86_FBDev should work, in theory. At the time of this writing it is
totally untested and may or may not even portray the beginnings of
working. If you end up testing this, please let me know!
XF86_FBDev has been shown to work on the Dreamcast in the past - though not yet
on any 2.6 series kernel.
--
Paul Mundt <lethal@linuxdc.org>
Updated by Adrian McMenamin <adrian@mcmen.demon.co.uk>

View File

@ -26,9 +26,7 @@ Who: Hans Verkuil <hverkuil@xs4all.nl> and
---------------------------
What: /sys/devices/.../power/state
dev->power.power_state
dpm_runtime_{suspend,resume)()
What: dev->power.power_state
When: July 2007
Why: Broken design for runtime control over driver power states, confusing
driver-internal runtime power management with: mechanisms to support
@ -53,6 +51,7 @@ Who: David Miller <davem@davemloft.net>
What: Video4Linux API 1 ioctls and video_decoder.h from Video devices.
When: December 2006
Files: include/linux/video_decoder.h
Check: include/linux/video_decoder.h
Why: V4L1 AP1 was replaced by V4L2 API. during migration from 2.4 to 2.6
series. The old API have lots of drawbacks and don't provide enough
means to work with all video and audio standards. The newer API is
@ -86,7 +85,7 @@ Who: Dominik Brodowski <linux@brodo.de>
What: remove EXPORT_SYMBOL(kernel_thread)
When: August 2006
Files: arch/*/kernel/*_ksyms.c
Funcs: kernel_thread
Check: kernel_thread
Why: kernel_thread is a low-level implementation detail. Drivers should
use the <linux/kthread.h> API instead which shields them from
implementation details and provides a higherlevel interface that
@ -137,6 +136,15 @@ Who: Greg Kroah-Hartman <gregkh@suse.de>
---------------------------
What: vm_ops.nopage
When: Soon, provided in-kernel callers have been converted
Why: This interface is replaced by vm_ops.fault, but it has been around
forever, is used by a lot of drivers, and doesn't cost much to
maintain.
Who: Nick Piggin <npiggin@suse.de>
---------------------------
What: Interrupt only SA_* flags
When: September 2007
Why: The interrupt related SA_* flags are replaced by IRQF_* to move them
@ -156,15 +164,6 @@ Who: Kay Sievers <kay.sievers@suse.de>
---------------------------
What: i2c-isa
When: December 2006
Why: i2c-isa is a non-sense and doesn't fit in the device driver
model. Drivers relying on it are better implemented as platform
drivers.
Who: Jean Delvare <khali@linux-fr.org>
---------------------------
What: i2c_adapter.list
When: July 2007
Why: Superfluous, this list duplicates the one maintained by the driver
@ -181,24 +180,11 @@ Who: Adrian Bunk <bunk@stusta.de>
---------------------------
What: /sys/firmware/acpi/namespace
When: 2.6.21
Why: The ACPI namespace is effectively the symbol list for
the BIOS. The device names are completely arbitrary
and have no place being exposed to user-space.
For those interested in the BIOS ACPI namespace,
the BIOS can be extracted and disassembled with acpidump
and iasl as documented in the pmtools package here:
http://ftp.kernel.org/pub/linux/kernel/people/lenb/acpi/utils
Who: Len Brown <len.brown@intel.com>
---------------------------
What: ACPI procfs interface
When: July 2007
Why: After ACPI sysfs conversion, ACPI attributes will be duplicated
in sysfs and the ACPI procfs interface should be removed.
When: July 2008
Why: ACPI sysfs conversion should be finished by January 2008.
ACPI procfs interface will be removed in July 2008 so that
there is enough time for the user space to catch up.
Who: Zhang Rui <rui.zhang@intel.com>
---------------------------
@ -211,6 +197,14 @@ Who: Len Brown <len.brown@intel.com>
---------------------------
What: /proc/acpi/event
When: February 2008
Why: /proc/acpi/event has been replaced by events via the input layer
and netlink since 2.6.23.
Who: Len Brown <len.brown@intel.com>
---------------------------
What: Compaq touchscreen device emulation
When: Oct 2007
Files: drivers/input/tsdev.c
@ -225,22 +219,6 @@ Who: Richard Purdie <rpurdie@rpsys.net>
---------------------------
What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer)
When: December 2007
Why: These functions are a leftover from 2.4 times. They have several
problems:
- Duplication of checks that are done in the device driver's
interrupt handler
- common I/O layer can't do device specific error recovery
- device driver can't be notified for conditions happening during
execution of the function
Device drivers should issue the read device characteristics and read
configuration data ccws and do the appropriate error handling
themselves.
Who: Cornelia Huck <cornelia.huck@de.ibm.com>
---------------------------
What: i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers
When: September 2007
Why: Obsolete. The new i2c-gpio driver replaces all hardware-specific
@ -310,3 +288,42 @@ Why: The arch/powerpc tree is the merged architecture for ppc32 and ppc64
Who: linuxppc-dev@ozlabs.org
---------------------------
What: mthca driver's MSI support
When: January 2008
Files: drivers/infiniband/hw/mthca/*.[ch]
Why: All mthca hardware also supports MSI-X, which provides
strictly more functionality than MSI. So there is no point in
having both MSI-X and MSI support in the driver.
Who: Roland Dreier <rolandd@cisco.com>
---------------------------
What: sk98lin network driver
When: Feburary 2008
Why: In kernel tree version of driver is unmaintained. Sk98lin driver
replaced by the skge driver.
Who: Stephen Hemminger <shemminger@linux-foundation.org>
---------------------------
What: i386/x86_64 bzImage symlinks
When: April 2008
Why: The i386/x86_64 merge provides a symlink to the old bzImage
location so not yet updated user space tools, e.g. package
scripts, do not break.
Who: Thomas Gleixner <tglx@linutronix.de>
---------------------------
What: shaper network driver
When: January 2008
Files: drivers/net/shaper.c, include/linux/if_shaper.h
Why: This driver has been marked obsolete for many years.
It was only designed to work on lower speed links and has design
flaws that lead to machine crashes. The qdisc infrastructure in
2.4 or later kernels, provides richer features and is more robust.
Who: Stephen Hemminger <shemminger@linux-foundation.org>
---------------------------

View File

@ -32,6 +32,8 @@ directory-locking
- info about the locking scheme used for directory operations.
dlmfs.txt
- info on the userspace interface to the OCFS2 DLM.
ecryptfs.txt
- docs on eCryptfs: stacked cryptographic filesystem for Linux.
ext2.txt
- info, mount options and specifications for the Ext2 filesystem.
ext3.txt

View File

@ -6,12 +6,26 @@ ABOUT
v9fs is a Unix implementation of the Plan 9 9p remote filesystem protocol.
This software was originally developed by Ron Minnich <rminnich@lanl.gov>
and Maya Gokhale <maya@lanl.gov>. Additional development by Greg Watson
This software was originally developed by Ron Minnich <rminnich@sandia.gov>
and Maya Gokhale. Additional development by Greg Watson
<gwatson@lanl.gov> and most recently Eric Van Hensbergen
<ericvh@gmail.com>, Latchesar Ionkov <lucho@ionkov.net> and Russ Cox
<rsc@swtch.com>.
The best detailed explanation of the Linux implementation and applications of
the 9p client is available in the form of a USENIX paper:
http://www.usenix.org/events/usenix05/tech/freenix/hensbergen.html
Other applications are described in the following papers:
* XCPU & Clustering
http://www.xcpu.org/xcpu-talk.pdf
* KVMFS: control file system for KVM
http://www.xcpu.org/kvmfs.pdf
* CellFS: A New ProgrammingModel for the Cell BE
http://www.xcpu.org/cellfs-talk.pdf
* PROSE I/O: Using 9p to enable Application Partitions
http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf
USAGE
=====
@ -90,9 +104,9 @@ subset of the namespace by extending the path: '#U*'/tmp would just export
and export.
A Linux version of the 9p server is now maintained under the npfs project
on sourceforge (http://sourceforge.net/projects/npfs). There is also a
more stable single-threaded version of the server (named spfs) available from
the same CVS repository.
on sourceforge (http://sourceforge.net/projects/npfs). The currently
maintained version is the single-threaded version of the server (named spfs)
available from the same CVS repository.
There are user and developer mailing lists available through the v9fs project
on sourceforge (http://sourceforge.net/projects/v9fs).

View File

@ -510,13 +510,24 @@ More details about quota locking can be found in fs/dquot.c.
prototypes:
void (*open)(struct vm_area_struct*);
void (*close)(struct vm_area_struct*);
int (*fault)(struct vm_area_struct*, struct vm_fault *);
struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *);
int (*page_mkwrite)(struct vm_area_struct *, struct page *);
locking rules:
BKL mmap_sem
BKL mmap_sem PageLocked(page)
open: no yes
close: no yes
fault: no yes
nopage: no yes
page_mkwrite: no yes no
->page_mkwrite() is called when a previously read-only page is
about to become writeable. The file system is responsible for
protecting against truncate races. Once appropriate action has been
taking to lock out truncate, the page range should be verified to be
within i_size. The page mapping should also be checked that it is not
NULL.
================================================================================
Dubious stuff

View File

@ -277,11 +277,10 @@ static struct config_item *simple_children_make_item(struct config_group *group,
{
struct simple_child *simple_child;
simple_child = kmalloc(sizeof(struct simple_child), GFP_KERNEL);
simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
if (!simple_child)
return NULL;
memset(simple_child, 0, sizeof(struct simple_child));
config_item_init_type_name(&simple_child->item, name,
&simple_child_type);
@ -364,12 +363,11 @@ static struct config_group *group_children_make_group(struct config_group *group
{
struct simple_children *simple_children;
simple_children = kmalloc(sizeof(struct simple_children),
simple_children = kzalloc(sizeof(struct simple_children),
GFP_KERNEL);
if (!simple_children)
return NULL;
memset(simple_children, 0, sizeof(struct simple_children));
config_group_init_type_name(&simple_children->group, name,
&simple_children_type);

View File

@ -0,0 +1,59 @@
Macintosh HFSPlus Filesystem for Linux
======================================
HFSPlus is a filesystem first introduced in MacOS 8.1.
HFSPlus has several extensions to HFS, including 32-bit allocation
blocks, 255-character unicode filenames, and file sizes of 2^63 bytes.
Mount options
=============
When mounting an HFSPlus filesystem, the following options are accepted:
creator=cccc, type=cccc
Specifies the creator/type values as shown by the MacOS finder
used for creating new files. Default values: '????'.
uid=n, gid=n
Specifies the user/group that owns all files on the filesystem
that have uninitialized permissions structures.
Default: user/group id of the mounting process.
umask=n
Specifies the umask (in octal) used for files and directories
that have uninitialized permissions structures.
Default: umask of the mounting process.
session=n
Select the CDROM session to mount as HFSPlus filesystem. Defaults to
leaving that decision to the CDROM driver. This option will fail
with anything but a CDROM as underlying devices.
part=n
Select partition number n from the devices. This option only makes
sense for CDROMs because they can't be partitioned under Linux.
For disk devices the generic partition parsing code does this
for us. Defaults to not parsing the partition table at all.
decompose
Decompose file name characters.
nodecompose
Do not decompose file name characters.
force
Used to force write access to volumes that are marked as journalled
or locked. Use at your own risk.
nls=cccc
Encoding to use when presenting file names.
References
==========
kernel source: <file:fs/hfsplus>
Apple Technote 1150 http://developer.apple.com/technotes/tn/tn1150.html

View File

@ -407,7 +407,7 @@ raiddev /dev/md0
device /dev/hda5
raid-disk 0
device /dev/hdb1
raid-disl 1
raid-disk 1
For linear raid, just change the raid-level above to "raid-level linear", for
mirrors, change it to "raid-level 1", and for stripe sets with parity, change
@ -457,6 +457,8 @@ ChangeLog
Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
2.1.29:
- Fix a deadlock when mounting read-write.
2.1.28:
- Fix a deadlock.
2.1.27:

View File

@ -28,11 +28,7 @@ Manish Singh <manish.singh@oracle.com>
Caveats
=======
Features which OCFS2 does not support yet:
- sparse files
- extended attributes
- shared writable mmap
- loopback is supported, but data written will not
be cluster coherent.
- quotas
- cluster aware flock
- cluster aware lockf
@ -57,3 +53,12 @@ nointr Do not allow signals to interrupt cluster
atime_quantum=60(*) OCFS2 will not update atime unless this number
of seconds has passed since the last update.
Set to zero to always update atime.
data=ordered (*) All data are forced directly out to the main file
system prior to its metadata being committed to the
journal.
data=writeback Data ordering is not preserved, data may be written
into the main file system after its metadata has been
committed to the journal.
preferred_slot=0(*) During mount, try to use this filesystem slot first. If
it is in use by another node, the first empty one found
will be chosen. Invalid values will be ignored.

View File

@ -42,6 +42,7 @@ Table of Contents
2.12 /proc/<pid>/oom_adj - Adjust the oom-killer score
2.13 /proc/<pid>/oom_score - Display current oom-killer score
2.14 /proc/<pid>/io - Display the IO accounting fields
2.15 /proc/<pid>/coredump_filter - Core dump filtering settings
------------------------------------------------------------------------------
Preface
@ -1065,6 +1066,13 @@ check the amount of free space (value is in seconds). Default settings are: 4,
resume it if we have a value of 3 or more percent; consider information about
the amount of free space valid for 30 seconds
audit_argv_kb
-------------
The file contains a single value denoting the limit on the argv array size
for execve (in KiB). This limit is only applied when system call auditing for
execve is enabled, otherwise the value is ignored.
ctrl-alt-del
------------
@ -2177,4 +2185,41 @@ those 64-bit counters, process A could see an intermediate result.
More information about this can be found within the taskstats documentation in
Documentation/accounting.
2.15 /proc/<pid>/coredump_filter - Core dump filtering settings
---------------------------------------------------------------
When a process is dumped, all anonymous memory is written to a core file as
long as the size of the core file isn't limited. But sometimes we don't want
to dump some memory segments, for example, huge shared memory. Conversely,
sometimes we want to save file-backed memory segments into a core file, not
only the individual files.
/proc/<pid>/coredump_filter allows you to customize which memory segments
will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
of memory types. If a bit of the bitmask is set, memory segments of the
corresponding memory type are dumped, otherwise they are not dumped.
The following 4 memory types are supported:
- (bit 0) anonymous private memory
- (bit 1) anonymous shared memory
- (bit 2) file-backed private memory
- (bit 3) file-backed shared memory
Note that MMIO pages such as frame buffer are never dumped and vDSO pages
are always dumped regardless of the bitmask status.
Default value of coredump_filter is 0x3; this means all anonymous memory
segments are dumped.
If you don't want to dump all shared memory segments attached to pid 1234,
write 1 to the process's proc file.
$ echo 0x1 > /proc/1234/coredump_filter
When a new process is created, the process inherits the bitmask status from its
parent. It is useful to set up coredump_filter before the program runs.
For example:
$ echo 0x7 > /proc/self/coredump_filter
$ ./some_program
------------------------------------------------------------------------------

View File

@ -75,6 +75,9 @@ using the include file:
If you stick to this convention then it'll be easier for other developers to
see what your code is doing, and help maintain it.
Note that these operations include I/O barriers on platforms which need to
use them; drivers don't need to add them explicitly.
Identifying GPIOs
-----------------
@ -145,7 +148,7 @@ pin ... that won't always match the specified output value, because of
issues including wire-OR and output latencies.
The get/set calls have no error returns because "invalid GPIO" should have
been reported earlier in gpio_set_direction(). However, note that not all
been reported earlier from gpio_direction_*(). However, note that not all
platforms can read the value of output pins; those that can't should always
return zero. Also, using these calls for GPIOs that can't safely be accessed
without sleeping (see below) is an error.
@ -236,7 +239,7 @@ map between them using calls like:
Those return either the corresponding number in the other namespace, or
else a negative errno code if the mapping can't be done. (For example,
some GPIOs can't used as IRQs.) It is an unchecked error to use a GPIO
number that hasn't been marked as an input using gpio_set_direction(), or
number that wasn't set up as an input using gpio_direction_input(), or
to use an IRQ number that didn't originally come from gpio_to_irq().
These two mapping calls are expected to cost on the order of a single

View File

@ -5,7 +5,7 @@ for the 8254 and Real Time Clock (RTC) periodic timer functionality.
Each HPET can have up to 32 timers. It is possible to configure the
first two timers as legacy replacements for 8254 and RTC periodic timers.
A specification done by Intel and Microsoft can be found at
<http://www.intel.com/hardwaredesign/hpetspec.htm>.
<http://www.intel.com/technology/architecture/hpetspec.htm>.
The driver supports detection of HPET driver allocation and initialization
of the HPET before the driver module_init routine is called. This enables

View File

@ -2,7 +2,7 @@ Kernel driver abituguru
=======================
Supported chips:
* Abit uGuru revision 1-3 (Hardware Monitor part only)
* Abit uGuru revision 1 & 2 (Hardware Monitor part only)
Prefix: 'abituguru'
Addresses scanned: ISA 0x0E0
Datasheet: Not available, this driver is based on reverse engineering.
@ -20,8 +20,8 @@ Supported chips:
uGuru 2.1.0.0 ~ 2.1.2.8 (AS8, AV8, AA8, AG8, AA8XE, AX8)
uGuru 2.2.0.0 ~ 2.2.0.6 (AA8 Fatal1ty)
uGuru 2.3.0.0 ~ 2.3.0.9 (AN8)
uGuru 3.0.0.0 ~ 3.0.1.2 (AW8, AL8, NI8)
uGuru 4.xxxxx? (AT8 32X) (2)
uGuru 3.0.0.0 ~ 3.0.x.x (AW8, AL8, AT8, NI8 SLI, AT8 32X, AN8 32X,
AW9D-MAX) (2)
1) For revisions 2 and 3 uGuru's the driver can autodetect the
sensortype (Volt or Temp) for bank1 sensors, for revision 1 uGuru's
this doesnot always work. For these uGuru's the autodection can
@ -30,8 +30,9 @@ Supported chips:
bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1
You may also need to specify the fan_sensors option for these boards
fan_sensors=5
2) The current version of the abituguru driver is known to NOT work
on these Motherboards
2) There is a seperate abituguru3 driver for these motherboards,
the abituguru (without the 3 !) driver will not work on these
motherboards (and visa versa)!
Authors:
Hans de Goede <j.w.r.degoede@hhs.nl>,
@ -43,8 +44,10 @@ Module Parameters
-----------------
* force: bool Force detection. Note this parameter only causes the
detection to be skipped, if the uGuru can't be read
the module initialization (insmod) will still fail.
detection to be skipped, and thus the insmod to
succeed. If the uGuru can't be read the actual hwmon
driver will not load and thus no hwmon device will get
registered.
* bank1_types: int[] Bank1 sensortype autodetection override:
-1 autodetect (default)
0 volt sensor
@ -69,13 +72,15 @@ dmesg | grep abituguru
Description
-----------
This driver supports the hardware monitoring features of the Abit uGuru chip
found on Abit uGuru featuring motherboards (most modern Abit motherboards).
This driver supports the hardware monitoring features of the first and
second revision of the Abit uGuru chip found on Abit uGuru featuring
motherboards (most modern Abit motherboards).
The uGuru chip in reality is a Winbond W83L950D in disguise (despite Abit
claiming it is "a new microprocessor designed by the ABIT Engineers").
Unfortunatly this doesn't help since the W83L950D is a generic
microcontroller with a custom Abit application running on it.
The first and second revision of the uGuru chip in reality is a Winbond
W83L950D in disguise (despite Abit claiming it is "a new microprocessor
designed by the ABIT Engineers"). Unfortunatly this doesn't help since the
W83L950D is a generic microcontroller with a custom Abit application running
on it.
Despite Abit not releasing any information regarding the uGuru, Olle
Sandberg <ollebull@gmail.com> has managed to reverse engineer the sensor part

View File

@ -0,0 +1,65 @@
Kernel driver abituguru3
========================
Supported chips:
* Abit uGuru revision 3 (Hardware Monitor part, reading only)
Prefix: 'abituguru3'
Addresses scanned: ISA 0x0E0
Datasheet: Not available, this driver is based on reverse engineering.
Note:
The uGuru is a microcontroller with onboard firmware which programs
it to behave as a hwmon IC. There are many different revisions of the
firmware and thus effectivly many different revisions of the uGuru.
Below is an incomplete list with which revisions are used for which
Motherboards:
uGuru 1.00 ~ 1.24 (AI7, KV8-MAX3, AN7)
uGuru 2.0.0.0 ~ 2.0.4.2 (KV8-PRO)
uGuru 2.1.0.0 ~ 2.1.2.8 (AS8, AV8, AA8, AG8, AA8XE, AX8)
uGuru 2.3.0.0 ~ 2.3.0.9 (AN8)
uGuru 3.0.0.0 ~ 3.0.x.x (AW8, AL8, AT8, NI8 SLI, AT8 32X, AN8 32X,
AW9D-MAX)
The abituguru3 driver is only for revison 3.0.x.x motherboards,
this driver will not work on older motherboards. For older
motherboards use the abituguru (without the 3 !) driver.
Authors:
Hans de Goede <j.w.r.degoede@hhs.nl>,
(Initial reverse engineering done by Louis Kruger)
Module Parameters
-----------------
* force: bool Force detection. Note this parameter only causes the
detection to be skipped, and thus the insmod to
succeed. If the uGuru can't be read the actual hwmon
driver will not load and thus no hwmon device will get
registered.
* verbose: bool Should the driver be verbose?
0/off/false normal output
1/on/true + verbose error reporting (default)
Default: 1 (the driver is still in the testing phase)
Description
-----------
This driver supports the hardware monitoring features of the third revision of
the Abit uGuru chip, found on recent Abit uGuru featuring motherboards.
The 3rd revision of the uGuru chip in reality is a Winbond W83L951G.
Unfortunatly this doesn't help since the W83L951G is a generic microcontroller
with a custom Abit application running on it.
Despite Abit not releasing any information regarding the uGuru revision 3,
Louis Kruger has managed to reverse engineer the sensor part of the uGuru.
Without his work this driver would not have been possible.
Known Issues
------------
The voltage and frequency control parts of the Abit uGuru are not supported,
neither is writing any of the sensor settings and writing / reading the
fanspeed control registers (FanEQ)
If you encounter any problems please mail me <j.w.r.degoede@hhs.nl> and
include the output of: "dmesg | grep abituguru"

View File

@ -6,13 +6,13 @@ Supported chips:
Prefix: 'adm1030'
Addresses scanned: I2C 0x2c to 0x2e
Datasheet: Publicly available at the Analog Devices website
http://products.analog.com/products/info.asp?product=ADM1030
http://www.analog.com/en/prod/0%2C2877%2CADM1030%2C00.html
* Analog Devices ADM1031
Prefix: 'adm1031'
Addresses scanned: I2C 0x2c to 0x2e
Datasheet: Publicly available at the Analog Devices website
http://products.analog.com/products/info.asp?product=ADM1031
http://www.analog.com/en/prod/0%2C2877%2CADM1031%2C00.html
Authors:
Alexandre d'Alton <alex@alexdalton.org>

257
Documentation/hwmon/dme1737 Normal file
View File

@ -0,0 +1,257 @@
Kernel driver dme1737
=====================
Supported chips:
* SMSC DME1737 and compatibles (like Asus A8000)
Prefix: 'dme1737'
Addresses scanned: I2C 0x2c, 0x2d, 0x2e
Datasheet: Provided by SMSC upon request and under NDA
Authors:
Juerg Haefliger <juergh@gmail.com>
Module Parameters
-----------------
* force_start: bool Enables the monitoring of voltage, fan and temp inputs
and PWM output control functions. Using this parameter
shouldn't be required since the BIOS usually takes care
of this.
Note that there is no need to use this parameter if the driver loads without
complaining. The driver will say so if it is necessary.
Description
-----------
This driver implements support for the hardware monitoring capabilities of the
SMSC DME1737 and Asus A8000 (which are the same) Super-I/O chips. This chip
features monitoring of 3 temp sensors temp[1-3] (2 remote diodes and 1
internal), 7 voltages in[0-6] (6 external and 1 internal) and 6 fan speeds
fan[1-6]. Additionally, the chip implements 5 PWM outputs pwm[1-3,5-6] for
controlling fan speeds both manually and automatically.
Fan[3-6] and pwm[3,5-6] are optional features and their availability is
dependent on the configuration of the chip. The driver will detect which
features are present during initialization and create the sysfs attributes
accordingly.
Voltage Monitoring
------------------
The voltage inputs are sampled with 12-bit resolution and have internal
scaling resistors. The values returned by the driver therefore reflect true
millivolts and don't need scaling. The voltage inputs are mapped as follows
(the last column indicates the input ranges):
in0: +5VTR (+5V standby) 0V - 6.64V
in1: Vccp (processor core) 0V - 3V
in2: VCC (internal +3.3V) 0V - 4.38V
in3: +5V 0V - 6.64V
in4: +12V 0V - 16V
in5: VTR (+3.3V standby) 0V - 4.38V
in6: Vbat (+3.0V) 0V - 4.38V
Each voltage input has associated min and max limits which trigger an alarm
when crossed.
Temperature Monitoring
----------------------
Temperatures are measured with 12-bit resolution and reported in millidegree
Celsius. The chip also features offsets for all 3 temperature inputs which -
when programmed - get added to the input readings. The chip does all the
scaling by itself and the driver therefore reports true temperatures that don't
need any user-space adjustments. The temperature inputs are mapped as follows
(the last column indicates the input ranges):
temp1: Remote diode 1 (3904 type) temperature -127C - +127C
temp2: DME1737 internal temperature -127C - +127C
temp3: Remote diode 2 (3904 type) temperature -127C - +127C
Each temperature input has associated min and max limits which trigger an alarm
when crossed. Additionally, each temperature input has a fault attribute that
returns 1 when a faulty diode or an unconnected input is detected and 0
otherwise.
Fan Monitoring
--------------
Fan RPMs are measured with 16-bit resolution. The chip provides inputs for 6
fan tachometers. All 6 inputs have an associated min limit which triggers an
alarm when crossed. Fan inputs 1-4 provide type attributes that need to be set
to the number of pulses per fan revolution that the connected tachometer
generates. Supported values are 1, 2, and 4. Fan inputs 5-6 only support fans
that generate 2 pulses per revolution. Fan inputs 5-6 also provide a max
attribute that needs to be set to the maximum attainable RPM (fan at 100% duty-
cycle) of the input. The chip adjusts the sampling rate based on this value.
PWM Output Control
------------------
This chip features 5 PWM outputs. PWM outputs 1-3 are associated with fan
inputs 1-3 and PWM outputs 5-6 are associated with fan inputs 5-6. PWM outputs
1-3 can be configured to operate either in manual or automatic mode by setting
the appropriate enable attribute accordingly. PWM outputs 5-6 can only operate
in manual mode, their enable attributes are therefore read-only. When set to
manual mode, the fan speed is set by writing the duty-cycle value to the
appropriate PWM attribute. In automatic mode, the PWM attribute returns the
current duty-cycle as set by the fan controller in the chip. All PWM outputs
support the setting of the output frequency via the freq attribute.
In automatic mode, the chip supports the setting of the PWM ramp rate which
defines how fast the PWM output is adjusting to changes of the associated
temperature input. Associating PWM outputs to temperature inputs is done via
temperature zones. The chip features 3 zones whose assignments to temperature
inputs is static and determined during initialization. These assignments can
be retrieved via the zone[1-3]_auto_channels_temp attributes. Each PWM output
is assigned to one (or hottest of multiple) temperature zone(s) through the
pwm[1-3]_auto_channels_zone attributes. Each PWM output has 3 distinct output
duty-cycles: full, low, and min. Full is internally hard-wired to 255 (100%)
and low and min can be programmed via pwm[1-3]_auto_point1_pwm and
pwm[1-3]_auto_pwm_min, respectively. The thermal thresholds of the zones are
programmed via zone[1-3]_auto_point[1-3]_temp and
zone[1-3]_auto_point1_temp_hyst:
pwm[1-3]_auto_point2_pwm full-speed duty-cycle (255, i.e., 100%)
pwm[1-3]_auto_point1_pwm low-speed duty-cycle
pwm[1-3]_auto_pwm_min min-speed duty-cycle
zone[1-3]_auto_point3_temp full-speed temp (all outputs)
zone[1-3]_auto_point2_temp full-speed temp
zone[1-3]_auto_point1_temp low-speed temp
zone[1-3]_auto_point1_temp_hyst min-speed temp
The chip adjusts the output duty-cycle linearly in the range of auto_point1_pwm
to auto_point2_pwm if the temperature of the associated zone is between
auto_point1_temp and auto_point2_temp. If the temperature drops below the
auto_point1_temp_hyst value, the output duty-cycle is set to the auto_pwm_min
value which only supports two values: 0 or auto_point1_pwm. That means that the
fan either turns completely off or keeps spinning with the low-speed
duty-cycle. If any of the temperatures rise above the auto_point3_temp value,
all PWM outputs are set to 100% duty-cycle.
Following is another representation of how the chip sets the output duty-cycle
based on the temperature of the associated thermal zone:
Duty-Cycle Duty-Cycle
Temperature Rising Temp Falling Temp
----------- ----------- ------------
full-speed full-speed full-speed
< linearly adjusted duty-cycle >
low-speed low-speed low-speed
min-speed low-speed
min-speed min-speed min-speed
min-speed min-speed
Sysfs Attributes
----------------
Following is a list of all sysfs attributes that the driver provides, their
permissions and a short description:
Name Perm Description
---- ---- -----------
cpu0_vid RO CPU core reference voltage in
millivolts.
vrm RW Voltage regulator module version
number.
in[0-6]_input RO Measured voltage in millivolts.
in[0-6]_min RW Low limit for voltage input.
in[0-6]_max RW High limit for voltage input.
in[0-6]_alarm RO Voltage input alarm. Returns 1 if
voltage input is or went outside the
associated min-max range, 0 otherwise.
temp[1-3]_input RO Measured temperature in millidegree
Celsius.
temp[1-3]_min RW Low limit for temp input.
temp[1-3]_max RW High limit for temp input.
temp[1-3]_offset RW Offset for temp input. This value will
be added by the chip to the measured
temperature.
temp[1-3]_alarm RO Alarm for temp input. Returns 1 if temp
input is or went outside the associated
min-max range, 0 otherwise.
temp[1-3]_fault RO Temp input fault. Returns 1 if the chip
detects a faulty thermal diode or an
unconnected temp input, 0 otherwise.
zone[1-3]_auto_channels_temp RO Temperature zone to temperature input
mapping. This attribute is a bitfield
and supports the following values:
1: temp1
2: temp2
4: temp3
zone[1-3]_auto_point1_temp_hyst RW Auto PWM temp point1 hysteresis. The
output of the corresponding PWM is set
to the pwm_auto_min value if the temp
falls below the auto_point1_temp_hyst
value.
zone[1-3]_auto_point[1-3]_temp RW Auto PWM temp points. Auto_point1 is
the low-speed temp, auto_point2 is the
full-speed temp, and auto_point3 is the
temp at which all PWM outputs are set
to full-speed (100% duty-cycle).
fan[1-6]_input RO Measured fan speed in RPM.
fan[1-6]_min RW Low limit for fan input.
fan[1-6]_alarm RO Alarm for fan input. Returns 1 if fan
input is or went below the associated
min value, 0 otherwise.
fan[1-4]_type RW Type of attached fan. Expressed in
number of pulses per revolution that
the fan generates. Supported values are
1, 2, and 4.
fan[5-6]_max RW Max attainable RPM at 100% duty-cycle.
Required for chip to adjust the
sampling rate accordingly.
pmw[1-3,5-6] RO/RW Duty-cycle of PWM output. Supported
values are 0-255 (0%-100%). Only
writeable if the associated PWM is in
manual mode.
pwm[1-3]_enable RW Enable of PWM outputs 1-3. Supported
values are:
0: turned off (output @ 100%)
1: manual mode
2: automatic mode
pwm[5-6]_enable RO Enable of PWM outputs 5-6. Always
returns 1 since these 2 outputs are
hard-wired to manual mode.
pmw[1-3,5-6]_freq RW Frequency of PWM output. Supported
values are in the range 11Hz-30000Hz
(default is 25000Hz).
pmw[1-3]_ramp_rate RW Ramp rate of PWM output. Determines how
fast the PWM duty-cycle will change
when the PWM is in automatic mode.
Expressed in ms per PWM step. Supported
values are in the range 0ms-206ms
(default is 0, which means the duty-
cycle changes instantly).
pwm[1-3]_auto_channels_zone RW PWM output to temperature zone mapping.
This attribute is a bitfield and
supports the following values:
1: zone1
2: zone2
4: zone3
6: highest of zone[2-3]
7: highest of zone[1-3]
pwm[1-3]_auto_pwm_min RW Auto PWM min pwm. Minimum PWM duty-
cycle. Supported values are 0 or
auto_point1_pwm.
pwm[1-3]_auto_point1_pwm RW Auto PWM pwm point. Auto_point1 is the
low-speed duty-cycle.
pwm[1-3]_auto_point2_pwm RO Auto PWM pwm point. Auto_point2 is the
full-speed duty-cycle which is hard-
wired to 255 (100% duty-cycle).

View File

@ -5,11 +5,11 @@ Supported chips:
* Fintek F71805F/FG
Prefix: 'f71805f'
Addresses scanned: none, address read from Super I/O config space
Datasheet: Provided by Fintek on request
Datasheet: Available from the Fintek website
* Fintek F71872F/FG
Prefix: 'f71872f'
Addresses scanned: none, address read from Super I/O config space
Datasheet: Provided by Fintek on request
Datasheet: Available from the Fintek website
Author: Jean Delvare <khali@linux-fr.org>
@ -128,7 +128,9 @@ it.
When the PWM method is used, you can select the operating frequency,
from 187.5 kHz (default) to 31 Hz. The best frequency depends on the
fan model. As a rule of thumb, lower frequencies seem to give better
control, but may generate annoying high-pitch noise. Fintek recommends
control, but may generate annoying high-pitch noise. So a frequency just
above the audible range, such as 25 kHz, may be a good choice; if this
doesn't give you good linear control, try reducing it. Fintek recommends
not going below 1 kHz, as the fan tachometers get confused by lower
frequencies as well.
@ -136,16 +138,23 @@ When the DC method is used, Fintek recommends not going below 5 V, which
corresponds to a pwm value of 106 for the driver. The driver doesn't
enforce this limit though.
Three different fan control modes are supported:
Three different fan control modes are supported; the mode number is written
to the pwm<n>_enable file.
* Manual mode
You ask for a specific PWM duty cycle or DC voltage.
* 1: Manual mode
You ask for a specific PWM duty cycle or DC voltage by writing to the
pwm<n> file.
* Fan speed mode
You ask for a specific fan speed. This mode assumes that pwm1
corresponds to fan1, pwm2 to fan2 and pwm3 to fan3.
* 2: Temperature mode
You define 3 temperature/fan speed trip points using the
pwm<n>_auto_point<m>_temp and _fan files. These define a staircase
relationship between temperature and fan speed with two additional points
interpolated between the values that you define. When the temperature
is below auto_point1_temp the fan is switched off.
* Temperature mode
You define 3 temperature/fan speed trip points, and the fan speed is
adjusted depending on the measured temperature, using interpolation.
This mode is not yet supported by the driver.
* 3: Fan speed mode
You ask for a specific fan speed by writing to the fan<n>_target file.
Both of the automatic modes require that pwm1 corresponds to fan1, pwm2 to
fan2 and pwm3 to fan3. Temperature mode also requires that temp1 corresponds
to pwm1 and fan1, etc.

View File

@ -12,11 +12,12 @@ Supported chips:
Addresses scanned: from Super I/O config space (8 I/O ports)
Datasheet: Publicly available at the ITE website
http://www.ite.com.tw/
* IT8716F
* IT8716F/IT8726F
Prefix: 'it8716'
Addresses scanned: from Super I/O config space (8 I/O ports)
Datasheet: Publicly available at the ITE website
http://www.ite.com.tw/product_info/file/pc/IT8716F_V0.3.ZIP
http://www.ite.com.tw/product_info/file/pc/IT8726F_V0.3.pdf
* IT8718F
Prefix: 'it8718'
Addresses scanned: from Super I/O config space (8 I/O ports)
@ -68,7 +69,7 @@ Description
-----------
This driver implements support for the IT8705F, IT8712F, IT8716F,
IT8718F and SiS950 chips.
IT8718F, IT8726F and SiS950 chips.
These chips are 'Super I/O chips', supporting floppy disks, infrared ports,
joysticks and other miscellaneous stuff. For hardware monitoring, they
@ -97,6 +98,10 @@ clock divider mess) but not compatible with the older chips and
revisions. For now, the driver only uses the 16-bit mode on the
IT8716F and IT8718F.
The IT8726F is just bit enhanced IT8716F with additional hardware
for AMD power sequencing. Therefore the chip will appear as IT8716F
to userspace applications.
Temperatures are measured in degrees Celsius. An alarm is triggered once
when the Overtemperature Shutdown limit is crossed.

View File

@ -48,6 +48,18 @@ Supported chips:
Addresses scanned: I2C 0x4c, 0x4d (unsupported 0x4e)
Datasheet: Publicly available at the Maxim website
http://www.maxim-ic.com/quick_view2.cfm/qv_pk/2578
* Maxim MAX6680
Prefix: 'max6680'
Addresses scanned: I2C 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b,
0x4c, 0x4d and 0x4e
Datasheet: Publicly available at the Maxim website
http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3370
* Maxim MAX6681
Prefix: 'max6680'
Addresses scanned: I2C 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b,
0x4c, 0x4d and 0x4e
Datasheet: Publicly available at the Maxim website
http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3370
Author: Jean Delvare <khali@linux-fr.org>
@ -59,11 +71,15 @@ Description
The LM90 is a digital temperature sensor. It senses its own temperature as
well as the temperature of up to one external diode. It is compatible
with many other devices such as the LM86, the LM89, the LM99, the ADM1032,
the MAX6657, MAX6658 and the MAX6659 all of which are supported by this driver.
Note that there is no easy way to differentiate between the last three
variants. The extra address and features of the MAX6659 are not supported by
this driver. Additionally, the ADT7461 is supported if found in ADM1032
compatibility mode.
the MAX6657, MAX6658, MAX6659, MAX6680 and the MAX6681 all of which are
supported by this driver.
Note that there is no easy way to differentiate between the MAX6657,
MAX6658 and MAX6659 variants. The extra address and features of the
MAX6659 are not supported by this driver. The MAX6680 and MAX6681 only
differ in their pinout, therefore they obviously can't (and don't need to)
be distinguished. Additionally, the ADT7461 is supported if found in
ADM1032 compatibility mode.
The specificity of this family of chipsets over the ADM1021/LM84
family is that it features critical limits with hysteresis, and an
@ -93,18 +109,22 @@ ADM1032:
* ALERT is triggered by open remote sensor.
* SMBus PEC support for Write Byte and Receive Byte transactions.
ADT7461
ADT7461:
* Extended temperature range (breaks compatibility)
* Lower resolution for remote temperature
MAX6657 and MAX6658:
* Remote sensor type selection
MAX6659
MAX6659:
* Selectable address
* Second critical temperature limit
* Remote sensor type selection
MAX6680 and MAX6681:
* Selectable address
* Remote sensor type selection
All temperature values are given in degrees Celsius. Resolution
is 1.0 degree for the local temperature, 0.125 degree for the remote
temperature.
@ -141,7 +161,7 @@ SMBus Read Byte, and PEC will work properly.
Additionally, the ADM1032 doesn't support SMBus Send Byte with PEC.
Instead, it will try to write the PEC value to the register (because the
SMBus Send Byte transaction with PEC is similar to a Write Byte transaction
without PEC), which is not what we want. Thus, PEC is explicitely disabled
without PEC), which is not what we want. Thus, PEC is explicitly disabled
on SMBus Send Byte transactions in the lm90 driver.
PEC on byte data transactions represents a significant increase in bandwidth

412
Documentation/hwmon/lm93 Normal file
View File

@ -0,0 +1,412 @@
Kernel driver lm93
==================
Supported chips:
* National Semiconductor LM93
Prefix 'lm93'
Addresses scanned: I2C 0x2c-0x2e
Datasheet: http://www.national.com/ds.cgi/LM/LM93.pdf
Author:
Mark M. Hoffman <mhoffman@lightlink.com>
Ported to 2.6 by Eric J. Bowersox <ericb@aspsys.com>
Adapted to 2.6.20 by Carsten Emde <ce@osadl.org>
Modified for mainline integration by Hans J. Koch <hjk@linutronix.de>
Module Parameters
-----------------
(specific to LM93)
* init: integer
Set to non-zero to force some initializations (default is 0).
* disable_block: integer
A "0" allows SMBus block data transactions if the host supports them. A "1"
disables SMBus block data transactions. The default is 0.
* vccp_limit_type: integer array (2)
Configures in7 and in8 limit type, where 0 means absolute and non-zero
means relative. "Relative" here refers to "Dynamic Vccp Monitoring using
VID" from the datasheet. It greatly simplifies the interface to allow
only one set of limits (absolute or relative) to be in operation at a
time (even though the hardware is capable of enabling both). There's
not a compelling use case for enabling both at once, anyway. The default
is "0,0".
* vid_agtl: integer
A "0" configures the VID pins for V(ih) = 2.1V min, V(il) = 0.8V max.
A "1" configures the VID pins for V(ih) = 0.8V min, V(il) = 0.4V max.
(The latter setting is referred to as AGTL+ Compatible in the datasheet.)
I.e. this parameter controls the VID pin input thresholds; if your VID
inputs are not working, try changing this. The default value is "0".
(common among sensor drivers)
* force: short array (min = 1, max = 48)
List of adapter,address pairs to assume to be present. Autodetection
of the target device will still be attempted. Use one of the more
specific force directives below if this doesn't detect the device.
* force_lm93: short array (min = 1, max = 48)
List of adapter,address pairs which are unquestionably assumed to contain
a 'lm93' chip
* ignore: short array (min = 1, max = 48)
List of adapter,address pairs not to scan
* ignore_range: short array (min = 1, max = 48)
List of adapter,start-addr,end-addr triples not to scan
* probe: short array (min = 1, max = 48)
List of adapter,address pairs to scan additionally
* probe_range: short array (min = 1, max = 48)
List of adapter,start-addr,end-addr triples to scan additionally
Hardware Description
--------------------
(from the datasheet)
The LM93, hardware monitor, has a two wire digital interface compatible with
SMBus 2.0. Using an 8-bit ADC, the LM93 measures the temperature of two remote
diode connected transistors as well as its own die and 16 power supply
voltages. To set fan speed, the LM93 has two PWM outputs that are each
controlled by up to four temperature zones. The fancontrol algorithm is lookup
table based. The LM93 includes a digital filter that can be invoked to smooth
temperature readings for better control of fan speed. The LM93 has four
tachometer inputs to measure fan speed. Limit and status registers for all
measured values are included. The LM93 builds upon the functionality of
previous motherboard management ASICs and uses some of the LM85 s features
(i.e. smart tachometer mode). It also adds measurement and control support
for dynamic Vccp monitoring and PROCHOT. It is designed to monitor a dual
processor Xeon class motherboard with a minimum of external components.
Driver Description
------------------
This driver implements support for the National Semiconductor LM93.
User Interface
--------------
#PROCHOT:
The LM93 can monitor two #PROCHOT signals. The results are found in the
sysfs files prochot1, prochot2, prochot1_avg, prochot2_avg, prochot1_max,
and prochot2_max. prochot1_max and prochot2_max contain the user limits
for #PROCHOT1 and #PROCHOT2, respectively. prochot1 and prochot2 contain
the current readings for the most recent complete time interval. The
value of prochot1_avg and prochot2_avg is something like a 2 period
exponential moving average (but not quite - check the datasheet). Note
that this third value is calculated by the chip itself. All values range
from 0-255 where 0 indicates no throttling, and 255 indicates > 99.6%.
The monitoring intervals for the two #PROCHOT signals is also configurable.
These intervals can be found in the sysfs files prochot1_interval and
prochot2_interval. The values in these files specify the intervals for
#P1_PROCHOT and #P2_PROCHOT, respectively. Selecting a value not in this
list will cause the driver to use the next largest interval. The available
intervals are:
#PROCHOT intervals: 0.73, 1.46, 2.9, 5.8, 11.7, 23.3, 46.6, 93.2, 186, 372
It is possible to configure the LM93 to logically short the two #PROCHOT
signals. I.e. when #P1_PROCHOT is asserted, the LM93 will automatically
assert #P2_PROCHOT, and vice-versa. This mode is enabled by writing a
non-zero integer to the sysfs file prochot_short.
The LM93 can also override the #PROCHOT pins by driving a PWM signal onto
one or both of them. When overridden, the signal has a period of 3.56 mS,
a minimum pulse width of 5 clocks (at 22.5kHz => 6.25% duty cycle), and
a maximum pulse width of 80 clocks (at 22.5kHz => 99.88% duty cycle).
The sysfs files prochot1_override and prochot2_override contain boolean
intgers which enable or disable the override function for #P1_PROCHOT and
#P2_PROCHOT, respectively. The sysfs file prochot_override_duty_cycle
contains a value controlling the duty cycle for the PWM signal used when
the override function is enabled. This value ranges from 0 to 15, with 0
indicating minimum duty cycle and 15 indicating maximum.
#VRD_HOT:
The LM93 can monitor two #VRD_HOT signals. The results are found in the
sysfs files vrdhot1 and vrdhot2. There is one value per file: a boolean for
which 1 indicates #VRD_HOT is asserted and 0 indicates it is negated. These
files are read-only.
Smart Tach Mode:
(from the datasheet)
If a fan is driven using a low-side drive PWM, the tachometer
output of the fan is corrupted. The LM93 includes smart tachometer
circuitry that allows an accurate tachometer reading to be
achieved despite the signal corruption. In smart tach mode all
four signals are measured within 4 seconds.
Smart tach mode is enabled by the driver by writing 1 or 2 (associating the
the fan tachometer with a pwm) to the sysfs file fan<n>_smart_tach. A zero
will disable the function for that fan. Note that Smart tach mode cannot be
enabled if the PWM output frequency is 22500 Hz (see below).
Manual PWM:
The LM93 has a fixed or override mode for the two PWM outputs (although, there
are still some conditions that will override even this mode - see section
15.10.6 of the datasheet for details.) The sysfs files pwm1_override
and pwm2_override are used to enable this mode; each is a boolean integer
where 0 disables and 1 enables the manual control mode. The sysfs files pwm1
and pwm2 are used to set the manual duty cycle; each is an integer (0-255)
where 0 is 0% duty cycle, and 255 is 100%. Note that the duty cycle values
are constrained by the hardware. Selecting a value which is not available
will cause the driver to use the next largest value. Also note: when manual
PWM mode is disabled, the value of pwm1 and pwm2 indicates the current duty
cycle chosen by the h/w.
PWM Output Frequency:
The LM93 supports several different frequencies for the PWM output channels.
The sysfs files pwm1_freq and pwm2_freq are used to select the frequency. The
frequency values are constrained by the hardware. Selecting a value which is
not available will cause the driver to use the next largest value. Also note
that this parameter has implications for the Smart Tach Mode (see above).
PWM Output Frequencies: 12, 36, 48, 60, 72, 84, 96, 22500 (h/w default)
Automatic PWM:
The LM93 is capable of complex automatic fan control, with many different
points of configuration. To start, each PWM output can be bound to any
combination of eight control sources. The final PWM is the largest of all
individual control sources to which the PWM output is bound.
The eight control sources are: temp1-temp4 (aka "zones" in the datasheet),
#PROCHOT 1 & 2, and #VRDHOT 1 & 2. The bindings are expressed as a bitmask
in the sysfs files pwm<n>_auto_channels, where a "1" enables the binding, and
a "0" disables it. The h/w default is 0x0f (all temperatures bound).
0x01 - Temp 1
0x02 - Temp 2
0x04 - Temp 3
0x08 - Temp 4
0x10 - #PROCHOT 1
0x20 - #PROCHOT 2
0x40 - #VRDHOT 1
0x80 - #VRDHOT 2
The function y = f(x) takes a source temperature x to a PWM output y. This
function of the LM93 is derived from a base temperature and a table of 12
temperature offsets. The base temperature is expressed in degrees C in the
sysfs files temp<n>_auto_base. The offsets are expressed in cumulative
degrees C, with the value of offset <i> for temperature value <n> being
contained in the file temp<n>_auto_offset<i>. E.g. if the base temperature
is 40C:
offset # temp<n>_auto_offset<i> range pwm
1 0 - 25.00%
2 0 - 28.57%
3 1 40C - 41C 32.14%
4 1 41C - 42C 35.71%
5 2 42C - 44C 39.29%
6 2 44C - 46C 42.86%
7 2 48C - 50C 46.43%
8 2 50C - 52C 50.00%
9 2 52C - 54C 53.57%
10 2 54C - 56C 57.14%
11 2 56C - 58C 71.43%
12 2 58C - 60C 85.71%
> 60C 100.00%
Valid offsets are in the range 0C <= x <= 7.5C in 0.5C increments.
There is an independent base temperature for each temperature channel. Note,
however, there are only two tables of offsets: one each for temp[12] and
temp[34]. Therefore, any change to e.g. temp1_auto_offset<i> will also
affect temp2_auto_offset<i>.
The LM93 can also apply hysteresis to the offset table, to prevent unwanted
oscillation between two steps in the offsets table. These values are found in
the sysfs files temp<n>_auto_offset_hyst. The value in this file has the
same representation as in temp<n>_auto_offset<i>.
If a temperature reading falls below the base value for that channel, the LM93
will use the minimum PWM value. These values are found in the sysfs files
temp<n>_auto_pwm_min. Note, there are only two minimums: one each for temp[12]
and temp[34]. Therefore, any change to e.g. temp1_auto_pwm_min will also
affect temp2_auto_pwm_min.
PWM Spin-Up Cycle:
A spin-up cycle occurs when a PWM output is commanded from 0% duty cycle to
some value > 0%. The LM93 supports a minimum duty cycle during spin-up. These
values are found in the sysfs files pwm<n>_auto_spinup_min. The value in this
file has the same representation as other PWM duty cycle values. The
duration of the spin-up cycle is also configurable. These values are found in
the sysfs files pwm<n>_auto_spinup_time. The value in this file is
the spin-up time in seconds. The available spin-up times are constrained by
the hardware. Selecting a value which is not available will cause the driver
to use the next largest value.
Spin-up Durations: 0 (disabled, h/w default), 0.1, 0.25, 0.4, 0.7, 1.0,
2.0, 4.0
#PROCHOT and #VRDHOT PWM Ramping:
If the #PROCHOT or #VRDHOT signals are asserted while bound to a PWM output
channel, the LM93 will ramp the PWM output up to 100% duty cycle in discrete
steps. The duration of each step is configurable. There are two files, with
one value each in seconds: pwm_auto_prochot_ramp and pwm_auto_vrdhot_ramp.
The available ramp times are constrained by the hardware. Selecting a value
which is not available will cause the driver to use the next largest value.
Ramp Times: 0 (disabled, h/w default) to 0.75 in 0.05 second intervals
Fan Boost:
For each temperature channel, there is a boost temperature: if the channel
exceeds this limit, the LM93 will immediately drive both PWM outputs to 100%.
This limit is expressed in degrees C in the sysfs files temp<n>_auto_boost.
There is also a hysteresis temperature for this function: after the boost
limit is reached, the temperature channel must drop below this value before
the boost function is disabled. This temperature is also expressed in degrees
C in the sysfs files temp<n>_auto_boost_hyst.
GPIO Pins:
The LM93 can monitor the logic level of four dedicated GPIO pins as well as the
four tach input pins. GPIO0-GPIO3 correspond to (fan) tach 1-4, respectively.
All eight GPIOs are read by reading the bitmask in the sysfs file gpio. The
LSB is GPIO0, and the MSB is GPIO7.
LM93 Unique sysfs Files
-----------------------
file description
-------------------------------------------------------------
prochot<n> current #PROCHOT %
prochot<n>_avg moving average #PROCHOT %
prochot<n>_max limit #PROCHOT %
prochot_short enable or disable logical #PROCHOT pin short
prochot<n>_override force #PROCHOT assertion as PWM
prochot_override_duty_cycle
duty cycle for the PWM signal used when
#PROCHOT is overridden
prochot<n>_interval #PROCHOT PWM sampling interval
vrdhot<n> 0 means negated, 1 means asserted
fan<n>_smart_tach enable or disable smart tach mode
pwm<n>_auto_channels select control sources for PWM outputs
pwm<n>_auto_spinup_min minimum duty cycle during spin-up
pwm<n>_auto_spinup_time duration of spin-up
pwm_auto_prochot_ramp ramp time per step when #PROCHOT asserted
pwm_auto_vrdhot_ramp ramp time per step when #VRDHOT asserted
temp<n>_auto_base temperature channel base
temp<n>_auto_offset[1-12]
temperature channel offsets
temp<n>_auto_offset_hyst
temperature channel offset hysteresis
temp<n>_auto_boost temperature channel boost (PWMs to 100%) limit
temp<n>_auto_boost_hyst temperature channel boost hysteresis
gpio input state of 8 GPIO pins; read-only
Sample Configuration File
-------------------------
Here is a sample LM93 chip config for sensors.conf:
---------- cut here ----------
chip "lm93-*"
# VOLTAGE INPUTS
# labels and scaling based on datasheet recommendations
label in1 "+12V1"
compute in1 @ * 12.945, @ / 12.945
set in1_min 12 * 0.90
set in1_max 12 * 1.10
label in2 "+12V2"
compute in2 @ * 12.945, @ / 12.945
set in2_min 12 * 0.90
set in2_max 12 * 1.10
label in3 "+12V3"
compute in3 @ * 12.945, @ / 12.945
set in3_min 12 * 0.90
set in3_max 12 * 1.10
label in4 "FSB_Vtt"
label in5 "3GIO"
label in6 "ICH_Core"
label in7 "Vccp1"
label in8 "Vccp2"
label in9 "+3.3V"
set in9_min 3.3 * 0.90
set in9_max 3.3 * 1.10
label in10 "+5V"
set in10_min 5.0 * 0.90
set in10_max 5.0 * 1.10
label in11 "SCSI_Core"
label in12 "Mem_Core"
label in13 "Mem_Vtt"
label in14 "Gbit_Core"
# Assuming R1/R2 = 4.1143, and 3.3V reference
# -12V = (4.1143 + 1) * (@ - 3.3) + 3.3
label in15 "-12V"
compute in15 @ * 5.1143 - 13.57719, (@ + 13.57719) / 5.1143
set in15_min -12 * 0.90
set in15_max -12 * 1.10
label in16 "+3.3VSB"
set in16_min 3.3 * 0.90
set in16_max 3.3 * 1.10
# TEMPERATURE INPUTS
label temp1 "CPU1"
label temp2 "CPU2"
label temp3 "LM93"
# TACHOMETER INPUTS
label fan1 "Fan1"
set fan1_min 3000
label fan2 "Fan2"
set fan2_min 3000
label fan3 "Fan3"
set fan3_min 3000
label fan4 "Fan4"
set fan4_min 3000
# PWM OUTPUTS
label pwm1 "CPU1"
label pwm2 "CPU2"

View File

@ -4,6 +4,7 @@ Kernel driver smsc47b397
Supported chips:
* SMSC LPC47B397-NC
* SMSC SCH5307-NS
* SMSC SCH5317
Prefix: 'smsc47b397'
Addresses scanned: none, address read from Super I/O config space
Datasheet: In this file
@ -18,8 +19,8 @@ The following specification describes the SMSC LPC47B397-NC[1] sensor chip
provided by Craig Kelly (In-Store Broadcast Network) and edited/corrected
by Mark M. Hoffman <mhoffman@lightlink.com>.
[1] And SMSC SCH5307-NS, which has a different device ID but is otherwise
compatible.
[1] And SMSC SCH5307-NS and SCH5317, which have different device IDs but are
otherwise compatible.
* * * * *
@ -131,7 +132,7 @@ OUT DX,AL
The registers of interest for identifying the SIO on the dc7100 are Device ID
(0x20) and Device Rev (0x21).
The Device ID will read 0x6F (for SCH5307-NS, 0x81)
The Device ID will read 0x6F (0x81 for SCH5307-NS, and 0x85 for SCH5317)
The Device Rev currently reads 0x01
Obtaining the HWM Base Address.

View File

@ -172,11 +172,10 @@ pwm[1-*] Pulse width modulation fan control.
255 is max or 100%.
pwm[1-*]_enable
Switch PWM on and off.
Not always present even if pwmN is.
0: turn off
1: turn on in manual mode
2+: turn on in automatic mode
Fan speed control method:
0: no fan speed control (i.e. fan at full speed)
1: manual fan speed control enabled (using pwm[1-*])
2+: automatic fan speed control enabled
Check individual chip documentation files for automatic mode
details.
RW
@ -343,9 +342,9 @@ to notify open diodes, unconnected fans etc. where the hardware
supports it. When this boolean has value 1, the measurement for that
channel should not be trusted.
in[0-*]_input_fault
fan[1-*]_input_fault
temp[1-*]_input_fault
in[0-*]_fault
fan[1-*]_fault
temp[1-*]_fault
Input fault condition
0: no fault occured
1: fault condition

View File

@ -0,0 +1,74 @@
Kernel driver thmc50
=====================
Supported chips:
* Analog Devices ADM1022
Prefix: 'adm1022'
Addresses scanned: I2C 0x2c - 0x2e
Datasheet: http://www.analog.com/en/prod/0,2877,ADM1022,00.html
* Texas Instruments THMC50
Prefix: 'thmc50'
Addresses scanned: I2C 0x2c - 0x2e
Datasheet: http://focus.ti.com/docs/prod/folders/print/thmc50.html
Author: Krzysztof Helt <krzysztof.h1@wp.pl>
This driver was derived from the 2.4 kernel thmc50.c source file.
Credits:
thmc50.c (2.4 kernel):
Frodo Looijaard <frodol@dds.nl>
Philip Edelbrock <phil@netroedge.com>
Module Parameters
-----------------
* adm1022_temp3: short array
List of adapter,address pairs to force chips into ADM1022 mode with
second remote temperature. This does not work for original THMC50 chips.
Description
-----------
The THMC50 implements: an internal temperature sensor, support for an
external diode-type temperature sensor (compatible w/ the diode sensor inside
many processors), and a controllable fan/analog_out DAC. For the temperature
sensors, limits can be set through the appropriate Overtemperature Shutdown
register and Hysteresis register. Each value can be set and read to half-degree
accuracy. An alarm is issued (usually to a connected LM78) when the
temperature gets higher then the Overtemperature Shutdown value; it stays on
until the temperature falls below the Hysteresis value. All temperatures are in
degrees Celsius, and are guaranteed within a range of -55 to +125 degrees.
The THMC50 only updates its values each 1.5 seconds; reading it more often
will do no harm, but will return 'old' values.
The THMC50 is usually used in combination with LM78-like chips, to measure
the temperature of the processor(s).
The ADM1022 works the same as THMC50 but it is faster (5 Hz instead of
1 Hz for THMC50). It can be also put in a new mode to handle additional
remote temperature sensor. The driver use the mode set by BIOS by default.
In case the BIOS is broken and the mode is set incorrectly, you can force
the mode with additional remote temperature with adm1022_temp3 parameter.
A typical symptom of wrong setting is a fan forced to full speed.
Driver Features
---------------
The driver provides up to three temperatures:
temp1 -- internal
temp2 -- remote
temp3 -- 2nd remote only for ADM1022
pwm1 -- fan speed (0 = stop, 255 = full)
pwm1_mode -- always 0 (DC mode)
The value of 0 for pwm1 also forces FAN_OFF signal from the chip,
so it stops fans even if the value 0 into the ANALOG_OUT register does not.
The driver was tested on Compaq AP550 with two ADM1022 chips (one works
in the temp3 mode), five temperature readings and two fans.

View File

@ -22,9 +22,9 @@ This driver implements support for the Winbond W83627EHF, W83627EHG, and
W83627DHG super I/O chips. We will refer to them collectively as Winbond chips.
The chips implement three temperature sensors, five fan rotation
speed sensors, ten analog voltage sensors (only nine for the 627DHG), alarms
with beep warnings (control unimplemented), and some automatic fan regulation
strategies (plus manual fan control mode).
speed sensors, ten analog voltage sensors (only nine for the 627DHG), one
VID (6 pins), alarms with beep warnings (control unimplemented), and
some automatic fan regulation strategies (plus manual fan control mode).
Temperatures are measured in degrees Celsius and measurement resolution is 1
degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when

View File

@ -6,7 +6,7 @@ Supported adapters:
Datasheet: Publicly available at the Intel website
* ServerWorks OSB4, CSB5, CSB6 and HT-1000 southbridges
Datasheet: Only available via NDA from ServerWorks
* ATI IXP200, IXP300, IXP400, SB600 and SB700 southbridges
* ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges
Datasheet: Not publicly available
* Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge
Datasheet: Publicly available at the SMSC website http://www.smsc.com

View File

@ -1,3 +1,13 @@
---------------------------------------------------------------------------
!!!!!!!!!!!!!!!WARNING!!!!!!!!
The zero page is a kernel internal data structure, not a stable ABI. It might change
without warning and the kernel has no way to detect old version of it.
If you're writing some external code like a boot loader you should only use
the stable versioned real mode boot protocol described in boot.txt. Otherwise the kernel
might break you at any time.
!!!!!!!!!!!!!WARNING!!!!!!!!!!!
----------------------------------------------------------------------------
Summary of boot_params layout (kernel point of view)
( collected by Hans Lermen and Martin Mares )

View File

@ -99,6 +99,20 @@ Transaction IDs
request/response pairs. The upper 32 bits are reserved for use by
the kernel and will be overwritten before a MAD is sent.
P_Key Index Handling
The old ib_umad interface did not allow setting the P_Key index for
MADs that are sent and did not provide a way for obtaining the P_Key
index of received MADs. A new layout for struct ib_user_mad_hdr
with a pkey_index member has been defined; however, to preserve
binary compatibility with older applications, this new layout will
not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called
before a file descriptor is used for anything else.
In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
to 6, the new layout of struct ib_user_mad_hdr will be used by
default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.
Setting IsSM Capability Bit
To set the IsSM capability bit for a port, simply open the

View File

@ -79,7 +79,7 @@ Field 8 -- # of milliseconds spent writing
measured from __make_request() to end_that_request_last()).
Field 9 -- # of I/Os currently in progress
The only field that should go to zero. Incremented as requests are
given to appropriate request_queue_t and decremented as they finish.
given to appropriate struct request_queue and decremented as they finish.
Field 10 -- # of milliseconds spent doing I/Os
This field is increases so long as field 9 is nonzero.
Field 11 -- weighted # of milliseconds spent doing I/Os

660
Documentation/ja_JP/HOWTO Normal file
View File

@ -0,0 +1,660 @@
NOTE:
This is a version of Documentation/HOWTO translated into Japanese.
This document is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com>
and the JF Project team <www.linux.or.jp/JF>.
If you find any difference between this document and the original file
or a problem with the translation,
please contact the maintainer of this file or JF project.
Please also note that the purpose of this file is to be easier to read
for non English (read: Japanese) speakers and is not intended as a
fork. So if you have any comments or updates for this file, please try
to update the original English file first.
Last Updated: 2007/09/23
==================================
これは、
linux-2.6.23/Documentation/HOWTO
の和訳です。
翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
翻訳日: 2007/09/19
翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com>
校正者: 松倉さん <nbh--mats at nifty dot com>
小林 雅典さん (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp>
武井伸光さん、<takei at webmasters dot gr dot jp>
かねこさん (Seiji Kaneko) <skaneko at a2 dot mbn dot or dot jp>
野口さん (Kenji Noguchi) <tokyo246 at gmail dot com>
河内さん (Takayoshi Kochi) <t-kochi at bq dot jp dot nec dot com>
岩本さん (iwamoto) <iwamoto.kn at ncos dot nec dot co dot jp>
内田さん (Satoshi Uchida) <s-uchida at ap dot jp dot nec dot com>
==================================
Linux カーネル開発のやり方
-------------------------------
これは上のトピック( Linux カーネル開発のやり方)の重要な事柄を網羅した
ドキュメントです。ここには Linux カーネル開発者になるための方法と
Linux カーネル開発コミュニティと共に活動するやり方を学ぶ方法が含まれて
います。カーネルプログラミングに関する技術的な項目に関することは何も含
めないようにしていますが、カーネル開発者となるための正しい方向に向かう
手助けになります。
もし、このドキュメントのどこかが古くなっていた場合には、このドキュメン
トの最後にリストしたメンテナにパッチを送ってください。
はじめに
---------
あなたは Linux カーネルの開発者になる方法を学びたいのでしょうか? そ
れともあなたは上司から「このデバイスの Linux ドライバを書くように」と
言われているのでしょうか? 
この文書の目的は、あなたが踏むべき手順と、コミュニティと一緒にうまく働
くヒントを書き下すことで、あなたが知るべき全てのことを教えることです。
また、このコミュニティがなぜ今うまくまわっているのかという理由の一部も
説明しようと試みています。
カーネルは 少量のアーキテクチャ依存部分がアセンブリ言語で書かれている
以外は大部分は C 言語で書かれています。C言語をよく理解していることはカー
ネル開発者には必要です。アーキテクチャ向けの低レベル部分の開発をするの
でなければ、(どんなアーキテクチャでも)アセンブリ(訳注: 言語)は必要あり
ません。以下の本は、C 言語の十分な知識や何年もの経験に取って代わるもの
ではありませんが、少なくともリファレンスとしては良い本です。
- "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
-『プログラミング言語第2版』(B.W. カーニハン/D.M. リッチー著 石田晴久訳) [共立出版]
- "Practical C Programming" by Steve Oualline [O'Reilly]
- 『C実践プログラミング第3版』(Steve Oualline著 望月康司監訳 谷口功訳) [オライリージャパン]
- "C: A Reference Manual" by Harbison and Steele [Prentice Hall]
- 『新・詳説 C 言語 H&S リファレンス』
(サミュエル P ハービソン/ガイ L スティール共著 斉藤 信男監訳)[ソフトバンク]
カーネルは GNU C と GNU ツールチェインを使って書かれています。カーネル
は ISO C89 仕様に準拠して書く一方で、標準には無い言語拡張を多く使って
います。カーネルは標準 C ライブラリとは関係がないといった、C 言語フリー
スタンディング環境です。そのため、C の標準で使えないものもあります。任
意の long long の除算や浮動小数点は使えません。
ときどき、カーネルがツールチェインや C 言語拡張に置いている前提がどう
なっているのかわかりにくいことがあり、また、残念なことに決定的なリファ
レンスは存在しません。情報を得るには、gcc の info ページ( info gcc )を
見てください。
あなたは既存の開発コミュニティと一緒に作業する方法を学ぼうとしているこ
とに留意してください。そのコミュニティは、コーディング、スタイル、
開発手順について高度な標準を持つ、多様な人の集まりです。
地理的に分散した大規模なチームに対してもっともうまくいくとわかったこと
をベースにしながら、これらの標準は長い時間をかけて築かれてきました。
これらはきちんと文書化されていますから、事前にこれらの標準についてでき
るだけたくさん学んでください。また皆があなたやあなたの会社のやり方に合わ
せてくれると思わないでください。
法的問題
------------
Linux カーネルのソースコードは GPL ライセンスの下でリリースされていま
す。ライセンスの詳細については、ソースツリーのメインディレクトリに存在
する、COPYING のファイルを見てください。もしライセンスについてさらに質
問があれば、Linux Kernel メーリングリストに質問するのではなく、どうぞ
法律家に相談してください。メーリングリストの人達は法律家ではなく、法的
問題については彼らの声明はあてにするべきではありません。
GPL に関する共通の質問や回答については、以下を参照してください。
http://www.gnu.org/licenses/gpl-faq.html
ドキュメント
------------
Linux カーネルソースツリーは幅広い範囲のドキュメントを含んでおり、それ
らはカーネルコミュニティと会話する方法を学ぶのに非常に貴重なものです。
新しい機能がカーネルに追加される場合、その機能の使い方について説明した
新しいドキュメントファイルも追加することを勧めます。
カーネルの変更が、カーネルがユーザ空間に公開しているインターフェイスの
変更を引き起こす場合、その変更を説明するマニュアルページのパッチや情報
をマニュアルページのメンテナ mtk-manpages@gmx.net に送ることを勧めま
す。
以下はカーネルソースツリーに含まれている読んでおくべきファイルの一覧で
す-
README
このファイルは Linuxカーネルの簡単な背景とカーネルを設定(訳注
configure )し、生成(訳注 build )するために必要なことは何かが書かれ
ています。カーネルに関して初めての人はここからスタートすると良いで
しょう。
Documentation/Changes
このファイルはカーネルをうまく生成(訳注 build )し、走らせるのに最
小限のレベルで必要な数々のソフトウェアパッケージの一覧を示してい
ます。
Documentation/CodingStyle
これは Linux カーネルのコーディングスタイルと背景にある理由を記述
しています。全ての新しいコードはこのドキュメントにあるガイドライン
に従っていることを期待されています。大部分のメンテナはこれらのルー
ルに従っているものだけを受け付け、多くの人は正しいスタイルのコード
だけをレビューします。
Documentation/SubmittingPatches
Documentation/SubmittingDrivers
これらのファイルには、どうやってうまくパッチを作って投稿するかに
ついて非常に詳しく書かれており、以下を含みます(これだけに限らない
けれども)
- Email に含むこと
- Email の形式
- だれに送るか
これらのルールに従えばうまくいくことを保証することではありません
が (すべてのパッチは内容とスタイルについて精査を受けるので)、
ルールに従わなければ間違いなくうまくいかないでしょう。
この他にパッチを作る方法についてのよくできた記述は-
"The Perfect Patch"
http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
"Linux kernel patch submission format"
http://linux.yyz.us/patch-format.html
Documentation/stable_api_nonsense.txt
このファイルはカーネルの中に不変のAPIを持たないことにした意識的な
決断の背景にある理由について書かれています。以下のようなことを含
んでいます-
- サブシステムとの間に層を作ること(コンパチビリティのため?)
- オペレーティングシステム間のドライバの移植性
- カーネルソースツリーの素早い変更を遅らせる(もしくは素早い変更
を妨げる)
このドキュメントは Linux 開発の思想を理解するのに非常に重要です。
そして、他のOSでの開発者が Linux に移る時にとても重要です。
Documentation/SecurityBugs
もし Linux カーネルでセキュリティ問題を発見したように思ったら、こ
のドキュメントのステップに従ってカーネル開発者に連絡し、問題解決を
支援してください。
Documentation/ManagementStyle
このドキュメントは Linux カーネルのメンテナ達がどう行動するか、
彼らの手法の背景にある共有されている精神について記述しています。こ
れはカーネル開発の初心者なら(もしくは、単に興味があるだけの人でも)
重要です。なぜならこのドキュメントは、カーネルメンテナ達の独特な
行動についての多くの誤解や混乱を解消するからです。
Documentation/stable_kernel_rules.txt
このファイルはどのように stable カーネルのリリースが行われるかのルー
ルが記述されています。そしてこれらのリリースの中のどこかで変更を取
り入れてもらいたい場合に何をすれば良いかが示されています。
Documentation/kernel-docs.txt
  カーネル開発に付随する外部ドキュメントのリストです。もしあなたが
探しているものがカーネル内のドキュメントでみつからなかった場合、
このリストをあたってみてください。
Documentation/applying-patches.txt
パッチとはなにか、パッチをどうやって様々なカーネルの開発ブランチに
適用するのかについて正確に記述した良い入門書です。
カーネルはソースコードから自動的に生成可能な多数のドキュメントを自分自
身でもっています。これにはカーネル内 API のすべての記述や、どう正しく
ロックをかけるかの規則が含まれます。このドキュメントは
Documentation/DocBook/ ディレクトリに作られ、以下のように
make pdfdocs
make psdocs
make htmldocs
make mandocs
コマンドを実行するとメインカーネルのソースディレクトリから
それぞれ、PDF, Postscript, HTML, man page の形式で生成されます。
カーネル開発者になるには
---------------------------
もしあなたが、Linux カーネル開発について何も知らないならば、
KernelNewbies プロジェクトを見るべきです
http://kernelnewbies.org
このサイトには役に立つメーリングリストがあり、基本的なカーネル開発に関
するほとんどどんな種類の質問もできます (既に回答されているようなことを
聞く前にまずはアーカイブを調べてください)。
またここには、リアルタイムで質問を聞くことができる IRC チャネルや、Linux
カーネルの開発に関して学ぶのに便利なたくさんの役に立つドキュメントがあ
ります。
web サイトには、コードの構成、サブシステム、現在存在するプロジェクト(ツ
リーにあるもの無いものの両方)の基本的な管理情報があります。
ここには、また、カーネルのコンパイルのやり方やパッチの当て方などの間接
的な基本情報も記述されています。
あなたがどこからスタートして良いかわからないが、Linux カーネル開発コミュ
ニティに参加して何かすることをさがしている場合には、Linux kernel
Janitor's プロジェクトにいけば良いでしょう -
http://janitor.kernelnewbies.org/
ここはそのようなスタートをするのにうってつけの場所です。ここには、
Linux カーネルソースツリーの中に含まれる、きれいにし、修正しなければな
らない、単純な問題のリストが記述されています。このプロジェクトに関わる
開発者と一緒に作業することで、あなたのパッチを Linuxカーネルツリーに入
れるための基礎を学ぶことができ、そしてもしあなたがまだアイディアを持っ
ていない場合には、次にやる仕事の方向性が見えてくるかもしれません。
もしあなたが、すでにひとまとまりコードを書いていて、カーネルツリーに入
れたいと思っていたり、それに関する適切な支援を求めたい場合、カーネル
メンターズプロジェクトはそのような皆さんを助けるためにできました。
ここにはメーリングリストがあり、以下から参照できます
http://selenic.com/mailman/listinfo/kernel-mentors
実際に Linux カーネルのコードについて修正を加える前に、どうやってその
コードが動作するのかを理解することが必要です。そのためには、特別なツー
ルの助けを借りてでも、それを直接よく読むことが最良の方法です(ほとんど
のトリッキーな部分は十分にコメントしてありますから)。そういうツールで
特におすすめなのは、Linux クロスリファレンスプロジェクトです。これは、
自己参照方式で、索引がついた web 形式で、ソースコードを参照することが
できます。この最新の素晴しいカーネルコードのリポジトリは以下で見つかり
ます-
http://sosdg.org/~qiyong/lxr/
開発プロセス
-----------------------
Linux カーネルの開発プロセスは現在幾つかの異なるメインカーネル「ブラン
チ」と多数のサブシステム毎のカーネルブランチから構成されます。
これらのブランチとは-
- メインの 2.6.x カーネルツリー
- 2.6.x.y -stable カーネルツリー
- 2.6.x -git カーネルパッチ
- 2.6.x -mm カーネルパッチ
- サブシステム毎のカーネルツリーとパッチ
2.6.x カーネルツリー
-----------------
2.6.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org
の pub/linux/kernel/v2.6/ ディレクトリに存在します。この開発プロセスは
以下のとおり-
- 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、
この期間中に、メンテナ達は Linus に大きな差分を送ることができます。
このような差分は通常 -mm カーネルに数週間含まれてきたパッチです。
大きな変更は git(カーネルのソース管理ツール、詳細は
http://git.or.cz/ 参照) を使って送るのが好ましいやり方ですが、パッ
チファイルの形式のまま送るのでも十分です。
- 2週間後、-rc1 カーネルがリリースされ、この後にはカーネル全体の安定
性に影響をあたえるような新機能は含まない類のパッチしか取り込むこと
はできません。新しいドライバ(もしくはファイルシステム)のパッチは
-rc1 の後で受け付けられることもあることを覚えておいてください。な
ぜなら、変更が独立していて、追加されたコードの外の領域に影響を与え
ない限り、退行のリスクは無いからです。-rc1 がリリースされた後、
Linus へパッチを送付するのに git を使うこともできますが、パッチは
レビューのために、パブリックなメーリングリストへも同時に送る必要が
あります。
- 新しい -rc は Linus が、最新の git ツリーがテスト目的であれば十分
に安定した状態にあると判断したときにリリースされます。目標は毎週新
しい -rc カーネルをリリースすることです。
- 以下の URL で各 -rc リリースに存在する既知の後戻り問題のリスト
が追跡されます-
http://kernelnewbies.org/known_regressions
- このプロセスはカーネルが 「準備ができた」と考えられるまで継続しま
す。このプロセスはだいたい 6週間継続します。
Andrew Morton が Linux-kernel メーリングリストにカーネルリリースについ
て書いたことをここで言っておくことは価値があります-
「カーネルがいつリリースされるかは誰も知りません。なぜなら、これは現
実に認識されたバグの状況によりリリースされるのであり、前もって決めら
れた計画によってリリースされるものではないからです。」
2.6.x.y -stable カーネルツリー
---------------------------
バージョンに4つ目の数字がついたカーネルは -stable カーネルです。これに
は、2.6.x カーネルで見つかったセキュリティ問題や重大な後戻りに対する比
較的小さい重要な修正が含まれます。
これは、開発/実験的バージョンのテストに協力することに興味が無く、
最新の安定したカーネルを使いたいユーザに推奨するブランチです。
もし、2.6.x.y カーネルが存在しない場合には、番号が一番大きい 2.6.x
が最新の安定版カーネルです。
2.6.x.y は "stable" チーム <stable@kernel.org> でメンテされており、だ
いたい隔週でリリースされています。
カーネルツリーに入っている、Documentation/stable_kernel_rules.txt ファ
イルにはどのような種類の変更が -stable ツリーに受け入れ可能か、またリ
リースプロセスがどう動くかが記述されています。
2.6.x -git パッチ
------------------
git リポジトリで管理されているLinus のカーネルツリーの毎日のスナップ
ショットがあります。(だから -git という名前がついています)。これらのパッ
チはおおむね毎日リリースされており、Linus のツリーの現状を表します。こ
れは -rc カーネルと比べて、パッチが大丈夫かどうかも確認しないで自動的
に生成されるので、より実験的です。
2.6.x -mm カーネルパッチ
------------------------
Andrew Morton によってリリースされる実験的なカーネルパッチ群です。
Andrew は個別のサブシステムカーネルツリーとパッチを全て集めてきて
linux-kernel メーリングリストで収集された多数のパッチと同時に一つにま
とめます。
このツリーは新機能とパッチが検証される場となります。ある期間の間パッチ
が -mm に入って価値を証明されたら、Andrew やサブシステムメンテナが、
メインラインへ入れるように Linus にプッシュします。
メインカーネルツリーに含めるために Linus に送る前に、すべての新しいパッ
チが -mm ツリーでテストされることが強く推奨されます。
これらのカーネルは安定して動作すべきシステムとして使うのには適切ではあ
りませんし、カーネルブランチの中でももっとも動作にリスクが高いものです。
もしあなたが、カーネル開発プロセスの支援をしたいと思っているのであれば、
どうぞこれらのカーネルリリースをテストに使ってみて、そしてもし問題があ
れば、またもし全てが正しく動作したとしても、linux-kernel メーリングリ
ストにフィードバックを提供してください。
すべての他の実験的パッチに加えて、これらのカーネルは通常リリース時点で
メインラインの -git カーネルに含まれる全ての変更も含んでいます。
-mm カーネルは決まったスケジュールではリリースされません、しかし通常幾
つかの -mm カーネル (1 から 3 が普通)が各-rc カーネルの間にリリースさ
れます。
サブシステム毎のカーネルツリーとパッチ
-------------------------------------------
カーネルの様々な領域で何が起きているかを見られるようにするため、多くの
カーネルサブシステム開発者は彼らの開発ツリーを公開しています。これらの
ツリーは説明したように -mm カーネルリリースに入れ込まれます。
以下はさまざまなカーネルツリーの中のいくつかのリスト-
git ツリー-
- Kbuild の開発ツリー、Sam Ravnborg <sam@ravnborg.org>
git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
- ACPI の開発ツリー、 Len Brown <len.brown@intel.com>
git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
- Block の開発ツリー、Jens Axboe <axboe@suse.de>
git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
- DRM の開発ツリー、Dave Airlie <airlied@linux.ie>
git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git
- ia64 の開発ツリー、Tony Luck <tony.luck@intel.com>
git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git
- infiniband, Roland Dreier <rolandd@cisco.com>
git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
- libata, Jeff Garzik <jgarzik@pobox.com>
git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git
- ネットワークドライバ, Jeff Garzik <jgarzik@pobox.com>
git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git
- pcmcia, Dominik Brodowski <linux@dominikbrodowski.net>
git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git
- SCSI, James Bottomley <James.Bottomley@SteelEye.com>
git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
quilt ツリー-
- USB, PCI ドライバコアと I2C, Greg Kroah-Hartman <gregkh@suse.de>
kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
- x86-64 と i386 の仲間 Andi Kleen <ak@suse.de>
その他のカーネルツリーは http://git.kernel.org/ と MAINTAINERS ファ
イルに一覧表があります。
バグレポート
-------------
bugzilla.kernel.org は Linux カーネル開発者がカーネルのバグを追跡する
場所です。ユーザは見つけたバグの全てをこのツールで報告すべきです。
どう kernel bugzilla を使うかの詳細は、以下を参照してください-
http://test.kernel.org/bugzilla/faq.html
メインカーネルソースディレクトリにあるファイル REPORTING-BUGS はカーネ
ルバグらしいものについてどうレポートするかの良いテンプレートであり、問
題の追跡を助けるためにカーネル開発者にとってどんな情報が必要なのかの詳
細が書かれています。
メーリングリスト
-------------
上のいくつかのドキュメントで述べていますが、コアカーネル開発者の大部分
は Linux kernel メーリングリストに参加しています。このリストの登録/脱
退の方法については以下を参照してください-
http://vger.kernel.org/vger-lists.html#linux-kernel
このメーリングリストのアーカイブは web 上の多数の場所に存在します。こ
れらのアーカイブを探すにはサーチエンジンを使いましょう。例えば-
http://dir.gmane.org/gmane.linux.kernel
リストに投稿する前にすでにその話題がアーカイブに存在するかどうかを検索
することを是非やってください。多数の事がすでに詳細に渡って議論されて
おり、アーカイブにのみ記録されています。
大部分のカーネルサブシステムも自分の個別の開発を実施するメーリングリス
トを持っています。個々のグループがどんなリストを持っているかは、
MAINTAINERS ファイルにリストがありますので参照してください。
多くのリストは kernel.org でホストされています。これらの情報は以下にあ
ります-
http://vger.kernel.org/vger-lists.html
メーリングリストを使う場合、良い行動習慣に従うようにしましょう。
少し安っぽいが、以下の URL は上のリスト(や他のリスト)で会話する場合の
シンプルなガイドラインを示しています-
http://www.albion.com/netiquette/
もし複数の人があなたのメールに返事をした場合、CC: で受ける人のリストは
だいぶ多くなるでしょう。良い理由がない場合、CC: リストから誰かを削除を
しないように、また、メーリングリストのアドレスだけにリプライすることの
ないようにしましょう。1つは送信者から、もう1つはリストからのように、メー
ルを2回受けることになってもそれに慣れ、しゃれたメールヘッダーを追加し
てこの状態を変えようとしないように。人々はそのようなことは好みません。
今までのメールでのやりとりとその間のあなたの発言はそのまま残し、
"John Kernlehacker wrote ...:" の行をあなたのリプライの先頭行にして、
メールの先頭でなく、各引用行の間にあなたの言いたいことを追加するべきで
す。
もしパッチをメールに付ける場合は、Documentaion/SubmittingPatches に提
示されているように、それは プレーンな可読テキストにすることを忘れない
ようにしましょう。カーネル開発者は 添付や圧縮したパッチを扱いたがりま
せん-
彼らはあなたのパッチの行毎にコメントを入れたいので、そのためにはそうす
るしかありません。あなたのメールプログラムが空白やタブを圧縮しないよう
に確認した方が良いです。最初の良いテストとしては、自分にメールを送って
みて、そのパッチを自分で当ててみることです。もしそれがうまく行かないな
ら、あなたのメールプログラムを直してもらうか、正しく動くように変えるべ
きです。
とりわけ、他の登録者に対する尊敬を表すようにすることを覚えておいてくだ
さい。
コミュニティと共に働くこと
--------------------------
カーネルコミュニティのゴールは可能なかぎり最高のカーネルを提供すること
です。あなたがパッチを受け入れてもらうために投稿した場合、それは、技術
的メリットだけがレビューされます。その際、あなたは何を予想すべきでしょ
うか?
- 批判
- コメント
- 変更の要求
- パッチの正当性の証明要求
- 沈黙
思い出してください、ここはあなたのパッチをカーネルに入れる話です。あ
なたは、あなたのパッチに対する批判とコメントを受け入れるべきで、それら
を技術的レベルで評価して、パッチを再作成するか、なぜそれらの変更をすべ
きでないかを明確で簡潔な理由の説明を提供してください。
もし、あなたのパッチに何も反応がない場合、たまにはメールの山に埋もれて
見逃され、あなたの投稿が忘れられてしまうこともあるので、数日待って再度
投稿してください。
あなたがやるべきでないものは?
- 質問なしにあなたのパッチが受け入れられると想像すること
- 守りに入ること
- コメントを無視すること
- 要求された変更を何もしないでパッチを出し直すこと
可能な限り最高の技術的解決を求めているコミュニティでは、パッチがどのく
らい有益なのかについては常に異なる意見があります。あなたは協調的である
べきですし、また、あなたのアイディアをカーネルに対してうまく合わせるよ
うにすることが望まれています。もしくは、最低限あなたのアイディアがそれ
だけの価値があるとすすんで証明するようにしなければなりません。
正しい解決に向かって進もうという意志がある限り、間違うことがあっても許
容されることを忘れないでください。
あなたの最初のパッチに単に 1ダースもの修正を求めるリストの返答になるこ
とも普通のことです。これはあなたのパッチが受け入れられないということで
は *ありません*、そしてあなた自身に反対することを意味するのでも *ありま
せん*。単に自分のパッチに対して指摘された問題を全て修正して再送すれば
良いのです。
カーネルコミュニティと企業組織のちがい
-----------------------------------------------------------------
カーネルコミュニティは大部分の伝統的な会社の開発環境とは異ったやり方で
動いています。以下は問題を避けるためにできると良いことのリストです-
あなたの提案する変更について言うときのうまい言い方:
- "これは複数の問題を解決します"
- "これは2000行のコードを削除します"
- "以下のパッチは、私が言おうとしていることを説明するものです"
- "私はこれを5つの異なるアーキテクチャでテストしたのですが..."
- "以下は一連の小さなパッチ群ですが..."
- "これは典型的なマシンでの性能を向上させます.."
やめた方が良い悪い言い方:
- このやり方で AIX/ptx/Solaris ではできたので、できるはずだ
- 私はこれを20年もの間やってきた、だから
- これは、私の会社が金儲けをするために必要だ
- これは我々のエンタープライズ向け商品ラインのためである
- これは 私が自分のアイディアを記述した、1000ページの設計資料である
- 私はこれについて、6ケ月作業している。
- 以下は ... に関する5000行のパッチです
- 私は現在のぐちゃぐちゃを全部書き直した、それが以下です...
- 私は〆切がある、そのためこのパッチは今すぐ適用される必要がある
カーネルコミュニティが大部分の伝統的なソフトウェアエンジニアリングの労
働環境と異なるもう一つの点は、やりとりに顔を合わせないということです。
email と irc を第一のコミュニケーションの形とする一つの利点は、性別や
民族の差別がないことです。Linux カーネルの職場環境は女性や少数民族を受
容します。なぜなら、email アドレスによってのみあなたが認識されるからで
す。
国際的な側面からも活動領域を均等にするようにします。なぜならば、あなた
は人の名前で性別を想像できないからです。ある男性が アンドレアという名
前で、女性の名前は パット かもしれません (訳注 Andrea は米国では女性、
それ以外(欧州など)では男性名として使われることが多い。同様に、Pat は
Patricia (主に女性名)や Patrick (主に男性名)の略称)。
Linux カーネルの活動をして、意見を表明したことがある大部分の女性は、前
向きな経験をもっています。
言葉の壁は英語が得意でない一部の人には問題になります。
メーリングリストの中できちんとアイディアを交換するには、相当うまく英語
を操れる必要があることもあります。そのため、あなたは自分のメール
を送る前に英語で意味が通じているかをチェックすることをお薦めします。
変更を分割する
---------------------
Linux カーネルコミュニティは、一度に大量のコードの塊を喜んで受容するこ
とはありません。変更は正確に説明される必要があり、議論され、小さい、個
別の部分に分割する必要があります。これはこれまで多くの会社がやり慣れて
きたことと全く正反対のことです。あなたのプロポーザルは、開発プロセスのと
ても早い段階から紹介されるべきです。そうすれば あなたは自分のやってい
ることにフィードバックを得られます。これは、コミュニティからみれば、あ
なたが彼らと一緒にやっているように感じられ、単にあなたの提案する機能の
ゴミ捨て場として使っているのではない、と感じられるでしょう。
しかし、一度に 50 もの email をメーリングリストに送りつけるようなことは
やってはいけません、あなたのパッチ群はいつもどんな時でもそれよりは小さ
くなければなりません。
パッチを分割する理由は以下です-
1) 小さいパッチはあなたのパッチが適用される見込みを大きくします、カー
ネルの人達はパッチが正しいかどうかを確認する時間や労力をかけないか
らです。5行のパッチはメンテナがたった1秒見るだけで適用できます。
しかし、500行のパッチは、正しいことをレビューするのに数時間かかるか
もしれません(時間はパッチのサイズなどにより指数関数に比例してかかり
ます)
小さいパッチは何かあったときにデバッグもとても簡単になります。パッ
チを1個1個取り除くのは、とても大きなパッチを当てた後に(かつ、何かお
かしくなった後で)解剖するのに比べればとても簡単です。
2) 小さいパッチを送るだけでなく、送るまえに、書き直して、シンプルにす
る(もしくは、単に順番を変えるだけでも)ことも、とても重要です。
以下はカーネル開発者の Al Viro のたとえ話です:
"生徒の数学の宿題を採点する先生のことを考えてみてください、先
生は生徒が解に到達するまでの試行錯誤を見たいとは思わないでしょ
う。先生は簡潔な最高の解を見たいのです。良い生徒はこれを知って
おり、そして最終解の前の中間作業を提出することは決してないので
す"
カーネル開発でもこれは同じです。メンテナ達とレビューア達は、
問題を解決する解の背後になる思考プロセスを見たいとは思いません。
彼らは単純であざやかな解決方法を見たいのです。
あざやかな解を説明するのと、コミュニティと共に仕事をし、未解決の仕事を
議論することのバランスをキープするのは難しいかもしれません。
ですから、開発プロセスの早期段階で改善のためのフィードバックをもらうよ
うにするのも良いですが、変更点を小さい部分に分割して全体ではまだ完成し
ていない仕事を(部分的に)取り込んでもらえるようにすることも良いことです。
また、でき上がっていないものや、"将来直す" ようなパッチを、本流に含め
てもらうように送っても、それは受け付けられないことを理解してください。
あなたの変更を正当化する
-------------------
あなたのパッチを分割するのと同時に、なぜその変更を追加しなければならな
いかを Linux コミュニティに知らせることはとても重要です。新機能は必要
性と有用性で正当化されなければなりません。
あなたの変更の説明
--------------------
あなたのパッチを送付する場合には、メールの中のテキストで何を言うかにつ
いて、特別に注意を払ってください。この情報はパッチの ChangeLog に使わ
れ、いつも皆がみられるように保管されます。これは次のような項目を含め、
パッチを完全に記述するべきです-
- なぜ変更が必要か
- パッチ全体の設計アプローチ
- 実装の詳細
- テスト結果
これについて全てがどのようにあるべきかについての詳細は、以下のドキュメ
ントの ChangeLog セクションを見てください-
"The Perfect Patch"
http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
これらのどれもが、時にはとても困難です。これらの慣例を完璧に実施するに
は数年かかるかもしれません。これは継続的な改善のプロセスであり、そのた
めには多数の忍耐と決意を必要とするものです。でも、諦めないで、これは可
能なことです。多数の人がすでにできていますし、彼らも皆最初はあなたと同
じところからスタートしたのですから。
Paolo Ciarrocchi に感謝、彼は彼の書いた "Development Process"
(http://linux.tar.bz/articles/2.6-development_process)セクショ
ンをこのテキストの原型にすることを許可してくれました。
Rundy Dunlap と Gerrit Huizenga はメーリングリストでやるべきこととやっ
てはいけないことのリストを提供してくれました。
以下の人々のレビュー、コメント、貢献に感謝。
Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers,
Vojtech Pavlik, Jan Kara, Josh Boyer, Kees Cook, Andrew Morton, Andi
Kleen, Vadim Lobanov, Jesper Juhl, Adrian Bunk, Keri Harris, Frans Pop,
David A. Wheeler, Junio Hamano, Michael Kerrisk, と Alex Shepard
彼らの支援なしでは、このドキュメントはできなかったでしょう。
Maintainer: Greg Kroah-Hartman <greg@kroah.com>

View File

@ -0,0 +1,263 @@
NOTE:
This is a version of Documentation/stable_api_nonsense.txt into Japanese.
This document is maintained by IKEDA, Munehiro <m-ikeda@ds.jp.nec.com>
and the JF Project team <http://www.linux.or.jp/JF/>.
If you find any difference between this document and the original file
or a problem with the translation,
please contact the maintainer of this file or JF project.
Please also note that the purpose of this file is to be easier to read
for non English (read: Japanese) speakers and is not intended as a
fork. So if you have any comments or updates of this file, please try
to update the original English file first.
Last Updated: 2007/07/18
==================================
これは、
linux-2.6.22-rc4/Documentation/stable_api_nonsense.txt の和訳
です。
翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
翻訳日 2007/06/11
原著作者: Greg Kroah-Hartman < greg at kroah dot com >
翻訳者 池田 宗広 < m-ikeda at ds dot jp dot nec dot com >
校正者 Masanori Kobayashi さん < zap03216 at nifty dot ne dot jp >
Seiji Kaneko さん < skaneko at a2 dot mbn dot or dot jp >
==================================
Linux カーネルのドライバインターフェース
(あなたの質問すべてに対する回答とその他諸々)
Greg Kroah-Hartman <greg at kroah dot com>
この文書は、なぜ Linux ではバイナリカーネルインターフェースが定義
されていないのか、またはなぜ不変のカーネルインターフェースを持たな
いのか、ということを説明するために書かれた。ここでの話題は「カーネ
ル内部の」インターフェースについてであり、ユーザー空間とのインター
フェースではないことを理解してほしい。カーネルとユーザー空間とのイ
ンターフェースとはアプリケーションプログラムが使用するものであり、
つまりシステムコールのインターフェースがこれに当たる。これは今まで
長きに渡り、かつ今後も「まさしく」不変である。私は確か 0.9 か何か
より前のカーネルを使ってビルドした古いプログラムを持っているが、そ
れは最新の 2.6 カーネルでもきちんと動作する。ユーザー空間とのイン
ターフェースは、ユーザーとアプリケーションプログラマが不変性を信頼
してよいものの一つである。
要旨
----
あなたは不変のカーネルインターフェースが必要だと考えているかもしれ
ないが、実際のところはそうではない。あなたは必要としているものが分
かっていない。あなたが必要としているものは安定して動作するドライバ
であり、それはドライバがメインのカーネルツリーに含まれる場合のみ得
ることができる。ドライバがメインのカーネルツリーに含まれていると、
他にも多くの良いことがある。それは、Linux をより強固で、安定な、成
熟したオペレーティングシステムにすることができるということだ。これ
こそ、そもそもあなたが Linux を使う理由のはずだ。
はじめに
--------
カーネル内部のインターフェース変更を心配しなければならないドライバ
を書きたいなどというのは、変わり者だけだ。この世界のほとんどの人は、
そのようなドライバがどんなインターフェースを使っているかなど知らな
いし、そんなドライバのことなど全く気にもかけていない。
まず初めに、クローズソースとか、ソースコードの隠蔽とか、バイナリの
みが配布される使い物にならない代物[訳注(1)]とか、実体はバイナリ
コードでそれを読み込むためのラッパー部分のみソースコードが公開され
ているとか、その他用語は何であれ GPL の下にソースコードがリリース
されていないカーネルドライバに関する法的な問題について、私は「いか
なる議論も」行うつもりがない。法的な疑問があるのならば、プログラマ
である私ではなく、弁護士に相談して欲しい。ここでは単に、技術的な問
題について述べることにする。(法的な問題を軽視しているわけではない。
それらは実際に存在するし、あなたはそれをいつも気にかけておく必要が
ある)
訳注(1)
「使い物にならない代物」の原文は "blob"
さてここでは、バイナリカーネルインターフェースについてと、ソースレ
ベルでのインターフェースの不変性について、という二つの話題を取り上
げる。この二つは互いに依存する関係にあるが、まずはバイナリインター
フェースについて議論を行いやっつけてしまおう。
バイナリカーネルインターフェース
--------------------------------
もしソースレベルでのインターフェースが不変ならば、バイナリインター
フェースも当然のように不変である、というのは正しいだろうか?正しく
ない。Linux カーネルに関する以下の事実を考えてみてほしい。
- あなたが使用するCコンパイラのバージョンによって、カーネル内部
の構造体の配置構造は異なったものになる。また、関数は異なった方
法でカーネルに含まれることになるかもしれない(例えばインライン
関数として扱われたり、扱われなかったりする)。個々の関数がどの
ようにコンパイルされるかはそれほど重要ではないが、構造体のパデ
ィングが異なるというのは非常に重要である。
- あなたがカーネルのビルドオプションをどのように設定するかによっ
て、カーネルには広い範囲で異なった事態が起こり得る。
- データ構造は異なるデータフィールドを持つかもしれない
- いくつかの関数は全く実装されていない状態になり得る
SMP向けではないビルドでは、いくつかのロックは中身が
カラにコンパイルされる)
- カーネル内のメモリは、異なった方法で配置され得る。これはビ
ルドオプションに依存している。
- Linux は様々な異なるプロセッサアーキテクチャ上で動作する。
あるアーキテクチャ用のバイナリドライバを、他のアーキテクチャで
正常に動作させる方法はない。
ある特定のカーネル設定を使用し、カーネルをビルドしたのと正確に同じ
Cコンパイラを使用して単にカーネルモジュールをコンパイルするだけで
も、あなたはこれらいくつもの問題に直面することになる。ある特定の
Linux ディストリビューションの、ある特定のリリースバージョン用にモ
ジュールを提供しようと思っただけでも、これらの問題を引き起こすには
十分である。にも関わらず Linux ディストリビューションの数と、サ
ポートするディストリビューションのリリース数を掛け算し、それら一つ
一つについてビルドを行ったとしたら、今度はリリースごとのビルドオプ
ションの違いという悪夢にすぐさま悩まされることになる。また、ディス
トリビューションの各リリースバージョンには、異なるハードウェア(プ
ロセッサタイプや種々のオプション)に対応するため、何種類かのカーネ
ルが含まれているということも理解して欲しい。従って、ある一つのリ
リースバージョンだけのためにモジュールを作成する場合でも、あなたは
何バージョンものモジュールを用意しなければならない。
信じて欲しい。このような方法でサポートを続けようとするなら、あなた
はいずれ正気を失うだろう。遠い昔、私はそれがいかに困難なことか、身
をもって学んだのだ・・・
不変のカーネルソースレベルインターフェース
------------------------------------------
メインカーネルツリーに含まれていない Linux カーネルドライバを継続
してサポートしていこうとしている人たちとの議論においては、これは極
めて「引火性の高い」話題である。[訳注(2)]
訳注(2)
「引火性の高い」の原文は "volatile"。
volatile には「揮発性の」「爆発しやすい」という意味の他、「変わり
やすい」「移り気な」という意味がある。
「(この話題は)爆発的に激しい論争を巻き起こしかねない」ということ
を、「(カーネルのソースレベルインターフェースは)移ろい行くもので
ある」ということを連想させる "volatile" という単語で表現している。
Linux カーネルの開発は継続的に速いペースで行われ、決して歩みを緩め
ることがない。その中でカーネル開発者達は、現状のインターフェースに
あるバグを見つけ、より良い方法を考え出す。彼らはやがて、現状のイン
ターフェースがより正しく動作するように修正を行う。その過程で関数の
名前は変更されるかもしれず、構造体は大きく、または小さくなるかもし
れず、関数の引数は検討しなおされるかもしれない。そのような場合、引
き続き全てが正常に動作するよう、カーネル内でこれらのインターフェー
スを使用している個所も全て同時に修正される。
具体的な例として、カーネル内の USB インターフェースを挙げる。USB
サブシステムはこれまでに少なくとも3回の書き直しが行われ、その結果
インターフェースが変更された。これらの書き直しはいくつかの異なった
問題を修正するために行われた。
- 同期的データストリームが非同期に変更された。これにより多数のド
ライバを単純化でき、全てのドライバのスループットが向上した。今
やほとんど全ての USB デバイスは、考えられる最高の速度で動作し
ている。
- USB ドライバが USB サブシステムのコアから行う、データパケット
用のメモリ確保方法が変更された。これに伴い、いくつもの文書化さ
れたデッドロック条件を回避するため、全ての USB ドライバはより
多くの情報を USB コアに提供しなければならないようになっている。
このできごとは、数多く存在するクローズソースのオペレーティングシス
テムとは全く対照的だ。それらは長期に渡り古い USB インターフェース
をメンテナンスしなければならない。古いインターフェースが残ることで、
新たな開発者が偶然古いインターフェースを使い、正しくない方法で開発
を行ってしまう可能性が生じる。これによりシステムの安定性は危険にさ
らされることになる。
上に挙げたどちらの例においても、開発者達はその変更が重要かつ必要で
あることに合意し、比較的楽にそれを実行した。もし Linux がソースレ
ベルでインターフェースの不変性を保証しなければならないとしたら、新
しいインターフェースを作ると同時に、古い、問題のある方を今後ともメ
ンテナンスするという余計な仕事を USB の開発者にさせなければならな
い。Linux の USB 開発者は、自分の時間を使って仕事をしている。よっ
て、価値のない余計な仕事を報酬もなしに実行しろと言うことはできない。
セキュリティ問題も、Linux にとっては非常に重要である。ひとたびセキ
ュリティに関する問題が発見されれば、それは極めて短期間のうちに修正
される。セキュリティ問題の発生を防ぐための修正は、カーネルの内部イ
ンターフェースの変更を何度も引き起こしてきた。その際同時に、変更さ
れたインターフェースを使用する全てのドライバもまた変更された。これ
により問題が解消し、将来偶然に問題が再発してしまわないことが保証さ
れる。もし内部インターフェースの変更が許されないとしたら、このよう
にセキュリティ問題を修正し、将来再発しないことを保証することなど不
可能なのだ。
カーネルのインターフェースは時が経つにつれクリーンナップを受ける。
誰も使っていないインターフェースは削除される。これにより、可能な限
りカーネルが小さく保たれ、現役の全てのインターフェースが可能な限り
テストされることを保証しているのだ。(使われていないインターフェー
スの妥当性をテストすることは不可能と言っていいだろう)
これから何をすべきか
-----------------------
では、もしメインのカーネルツリーに含まれない Linux カーネルドライ
バがあったとして、あなたは、つまり開発者は何をするべきだろうか?全
てのディストリビューションの全てのカーネルバージョン向けにバイナリ
のドライバを供給することは悪夢であり、カーネルインターフェースの変
更を追いかけ続けることもまた過酷な仕事だ。
答えは簡単。そのドライバをメインのカーネルツリーに入れてしまえばよ
い。ここで言及しているのは、GPL に従って公開されるドライバのこと
だということに注意してほしい。あなたのコードがそれに該当しないなら
ば、さよなら。幸運を祈ります。ご自分で何とかしてください。Andrew
と Linus からのコメントAndrew と Linus のコメントへのリンクをこ
こに置く>をどうぞ)ドライバがメインツリーに入れば、カーネルのイン
ターフェースが変更された場合、変更を行った開発者によってドライバも
修正されることになるだろう。あなたはほとんど労力を払うことなしに、
常にビルド可能できちんと動作するドライバを手に入れることができる。
ドライバをメインのカーネルツリーに入れると、非常に好ましい以下の効
果がある。
- ドライバの品質が向上する一方で、(元の開発者にとっての)メンテ
ナンスコストは下がる。
- あなたのドライバに他の開発者が機能を追加してくれる。
- 誰かがあなたのドライバにあるバグを見つけ、修正してくれる。
- 誰かがあなたのドライバにある改善点を見つけてくれる。
- 外部インターフェースが変更されドライバの更新が必要になった場合、
誰かがあなたの代わりに更新してくれる。
- ドライバを入れてくれとディストロに頼まなくても、そのドライバは
全ての Linux ディストリビューションに自動的に含まれてリリース
される。
Linux では、他のどのオペレーティングシステムよりも数多くのデバイス
が「そのまま」使用できるようになった。また Linux は、どのオペレー
ティングシステムよりも数多くのプロセッサアーキテクチャ上でそれらの
デバイスを使用することができるようにもなった。このように、Linux の
開発モデルは実証されており、今後も間違いなく正しい方向へと進んでい
くだろう。:)
------
この文書の初期の草稿に対し、Randy Dunlap, Andrew Morton, David
Brownell, Hanna Linder, Robert Love, Nishanth Aravamudan から査読
と助言を頂きました。感謝申し上げます。

View File

@ -98,6 +98,15 @@ applicable everywhere (see syntax).
times, the limit is set to the largest selection.
Reverse dependencies can only be used with boolean or tristate
symbols.
Note:
select is evil.... select will by brute force set a symbol
equal to 'y' without visiting the dependencies. So abusing
select you are able to select a symbol FOO even if FOO depends
on BAR that is not set. In general use select only for
non-visible symbols (no promts anywhere) and for symbols with
no dependencies. That will limit the usefulness but on the
other hand avoid the illegal configurations all over. kconfig
should one day warn about such things.
- numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
This allows to limit the range of possible input values for int

View File

@ -501,6 +501,20 @@ more details, with real examples.
The third parameter may be a text as in this example, but it may also
be an expanded variable or a macro.
cc-fullversion
cc-fullversion is useful when the exact version of gcc is needed.
One typical use-case is when a specific GCC version is broken.
cc-fullversion points out a more specific version than cc-version does.
Example:
#arch/powerpc/Makefile
$(Q)if test "$(call cc-fullversion)" = "040200" ; then \
echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
false ; \
fi
In this example for a specific GCC version the build will error out explaining
to the user why it stops.
=== 4 Host Program support

View File

@ -30,17 +30,18 @@ the beginning of each description states the restrictions within which a
parameter is applicable:
ACPI ACPI support is enabled.
AGP AGP (Accelerated Graphics Port) is enabled.
ALSA ALSA sound support is enabled.
APIC APIC support is enabled.
APM Advanced Power Management support is enabled.
AX25 Appropriate AX.25 support is enabled.
BLACKFIN Blackfin architecture is enabled.
DRM Direct Rendering Management support is enabled.
EDD BIOS Enhanced Disk Drive Services (EDD) is enabled
EFI EFI Partitioning (GPT) is enabled
EIDE EIDE/ATAPI support is enabled.
FB The frame buffer device is enabled.
HW Appropriate hardware is enabled.
IA-32 IA-32 aka i386 architecture is enabled.
IA-64 IA-64 architecture is enabled.
IOSCHED More than one I/O scheduler is enabled.
IP_PNP IP DHCP, BOOTP, or RARP is enabled.
@ -57,16 +58,17 @@ parameter is applicable:
MDA MDA console support is enabled.
MOUSE Appropriate mouse support is enabled.
MSI Message Signaled Interrupts (PCI).
MTD MTD support is enabled.
MTD MTD (Memory Technology Device) support is enabled.
NET Appropriate network support is enabled.
NUMA NUMA support is enabled.
GENERIC_TIME The generic timeofday code is enabled.
NFS Appropriate NFS support is enabled.
OSS OSS sound support is enabled.
PV_OPS A paravirtualized kernel
PARIDE The ParIDE subsystem is enabled.
PV_OPS A paravirtualized kernel is enabled.
PARIDE The ParIDE (parallel port IDE) subsystem is enabled.
PARISC The PA-RISC architecture is enabled.
PCI PCI bus support is enabled.
PCIE PCI Express support is enabled.
PCMCIA The PCMCIA subsystem is enabled.
PNP Plug & Play support is enabled.
PPC PowerPC architecture is enabled.
@ -91,6 +93,7 @@ parameter is applicable:
VT Virtual terminal support is enabled.
WDT Watchdog support is enabled.
XT IBM PC/XT MFM hard disk support is enabled.
X86-32 X86-32, aka i386 architecture is enabled.
X86-64 X86-64 architecture is enabled.
More X86-64 boot options can be found in
Documentation/x86_64/boot-options.txt .
@ -122,10 +125,6 @@ and is between 256 and 4096 characters. It is defined in the file
./include/asm/setup.h as COMMAND_LINE_SIZE.
53c7xx= [HW,SCSI] Amiga SCSI controllers
See header of drivers/scsi/53c7xx.c.
See also Documentation/scsi/ncr53c7xx.txt.
acpi= [HW,ACPI,X86-64,i386]
Advanced Configuration and Power Interface
Format: { force | off | ht | strict | noirq }
@ -166,6 +165,8 @@ and is between 256 and 4096 characters. It is defined in the file
acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA
Format: <irq>,<irq>...
acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT
acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS
Format: To spoof as Windows 98: ="Microsoft Windows"
@ -222,11 +223,17 @@ and is between 256 and 4096 characters. It is defined in the file
acpi_fake_ecdt [HW,ACPI] Workaround failure due to BIOS lacking ECDT
acpi_pm_good [IA-32,X86-64]
acpi_pm_good [X86-32,X86-64]
Override the pmtimer bug detection: force the kernel
to assume that this machine's pmtimer latches its value
and always returns good values.
agp= [AGP]
{ off | try_unsupported }
off: disable AGP support
try_unsupported: try to drive unsupported chipsets
(may crash computer or cause data corruption)
enable_timer_pin_1 [i386,x86-64]
Enable PIN 1 of APIC timer
Can be useful to work around chipset bugs
@ -279,7 +286,8 @@ and is between 256 and 4096 characters. It is defined in the file
not play well with APC CPU idle - disable it if you have
APC and your system crashes randomly.
apic= [APIC,i386] Change the output verbosity whilst booting
apic= [APIC,i386] Advanced Programmable Interrupt Controller
Change the output verbosity whilst booting
Format: { quiet (default) | verbose | debug }
Change the amount of debugging information output
when initialising the APIC and IO-APIC components.
@ -353,7 +361,7 @@ and is between 256 and 4096 characters. It is defined in the file
c101= [NET] Moxa C101 synchronous serial card
cachesize= [BUGS=IA-32] Override level 2 CPU cache size detection.
cachesize= [BUGS=X86-32] Override level 2 CPU cache size detection.
Sometimes CPU hardware bugs make them report the cache
size incorrectly. The kernel will attempt work arounds
to fix known problems, but for some CPUs it is not
@ -372,7 +380,7 @@ and is between 256 and 4096 characters. It is defined in the file
Value can be changed at runtime via
/selinux/checkreqprot.
clock= [BUGS=IA-32, HW] gettimeofday clocksource override.
clock= [BUGS=X86-32, HW] gettimeofday clocksource override.
[Deprecated]
Forces specified clocksource (if available) to be used
when calculating gettimeofday(). If specified
@ -390,7 +398,7 @@ and is between 256 and 4096 characters. It is defined in the file
[ARM] imx_timer1,OSTS,netx_timer,mpu_timer2,
pxa_timer,timer3,32k_counter,timer0_1
[AVR32] avr32
[IA-32] pit,hpet,tsc,vmi-timer;
[X86-32] pit,hpet,tsc,vmi-timer;
scx200_hrt on Geode; cyclone on IBM x440
[MIPS] MIPS
[PARISC] cr16
@ -410,7 +418,7 @@ and is between 256 and 4096 characters. It is defined in the file
over the 8254 in addition to over the IO-APIC. The
kernel tries to set a sensible default.
hpet= [IA-32,HPET] option to disable HPET and use PIT.
hpet= [X86-32,HPET] option to disable HPET and use PIT.
Format: disable
com20020= [HW,NET] ARCnet - COM20020 chipset
@ -462,9 +470,6 @@ and is between 256 and 4096 characters. It is defined in the file
Format:
<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
cpia_pp= [HW,PPT]
Format: { parport<nr> | auto | none }
crashkernel=nn[KMG]@ss[KMG]
[KNL] Reserve a chunk of physical memory to
hold a kernel to switch to with kexec on panic.
@ -547,7 +552,7 @@ and is between 256 and 4096 characters. It is defined in the file
dtc3181e= [HW,SCSI]
earlyprintk= [IA-32,X86-64,SH]
earlyprintk= [X86-32,X86-64,SH,BLACKFIN]
earlyprintk=vga
earlyprintk=serial[,ttySn[,baudrate]]
@ -585,7 +590,7 @@ and is between 256 and 4096 characters. It is defined in the file
eisa_irq_edge= [PARISC,HW]
See header of drivers/parisc/eisa.c.
elanfreq= [IA-32]
elanfreq= [X86-32]
See comment before function elanfreq_setup() in
arch/i386/kernel/cpu/cpufreq/elanfreq.c.
@ -594,7 +599,7 @@ and is between 256 and 4096 characters. It is defined in the file
See Documentation/block/as-iosched.txt and
Documentation/block/deadline-iosched.txt for details.
elfcorehdr= [IA-32, X86_64]
elfcorehdr= [X86-32, X86_64]
Specifies physical address of start of kernel core
image elf header. Generally kexec loader will
pass this option to capture kernel.
@ -676,7 +681,7 @@ and is between 256 and 4096 characters. It is defined in the file
hisax= [HW,ISDN]
See Documentation/isdn/README.HiSax.
hugepages= [HW,IA-32,IA-64] Maximal number of HugeTLB pages.
hugepages= [HW,X86-32,IA-64] Maximal number of HugeTLB pages.
i8042.direct [HW] Put keyboard port into non-translated mode
i8042.dumbkbd [HW] Pretend that controller can only read data from
@ -768,7 +773,8 @@ and is between 256 and 4096 characters. It is defined in the file
See Documentation/nfsroot.txt.
ip2= [HW] Set IO/IRQ pairs for up to 4 IntelliPort boards
See comment before ip2_setup() in drivers/char/ip2.c.
See comment before ip2_setup() in
drivers/char/ip2/ip2base.c.
ips= [HW,SCSI] Adaptec / IBM ServeRAID controller
See header of drivers/scsi/ips.c.
@ -817,7 +823,7 @@ and is between 256 and 4096 characters. It is defined in the file
js= [HW,JOY] Analog joystick
See Documentation/input/joystick.txt.
kernelcore=nn[KMG] [KNL,IA-32,IA-64,PPC,X86-64] This parameter
kernelcore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
specifies the amount of memory usable by the kernel
for non-movable allocations. The requested amount is
spread evenly throughout all nodes in the system. The
@ -833,7 +839,7 @@ and is between 256 and 4096 characters. It is defined in the file
use the HighMem zone if it exists, and the Normal
zone if it does not.
movablecore=nn[KMG] [KNL,IA-32,IA-64,PPC,X86-64] This parameter
movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
is similar to kernelcore except it specifies the
amount of memory used for migratable allocations.
If both kernelcore and movablecore is specified,
@ -845,27 +851,23 @@ and is between 256 and 4096 characters. It is defined in the file
keepinitrd [HW,ARM]
kstack=N [IA-32,X86-64] Print N words from the kernel stack
kstack=N [X86-32,X86-64] Print N words from the kernel stack
in oops dumps.
l2cr= [PPC]
lapic [IA-32,APIC] Enable the local APIC even if BIOS
lapic [X86-32,APIC] Enable the local APIC even if BIOS
disabled it.
lapic_timer_c2_ok [IA-32,x86-64,APIC] trust the local apic timer in
lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer in
C2 power state.
lasi= [HW,SCSI] PARISC LASI driver for the 53c700 chip
Format: addr:<io>,irq:<irq>
legacy_serial.force [HW,IA-32,X86-64]
Probe for COM ports at legacy addresses even
if PNPBIOS or ACPI should describe them. This
is for working around firmware defects.
llsc*= [IA64] See function print_params() in
arch/ia64/sn/kernel/llsc4.c.
libata.noacpi [LIBATA] Disables use of ACPI in libata suspend/resume
when set.
Format: <int>
load_ramdisk= [RAM] List of ramdisks to load from floppy
See Documentation/ramdisk.txt.
@ -953,14 +955,10 @@ and is between 256 and 4096 characters. It is defined in the file
Format: <1-256>
maxcpus= [SMP] Maximum number of processors that an SMP kernel
should make use of.
Using "nosmp" or "maxcpus=0" will disable SMP
entirely (the MPS table probe still happens, though).
A command-line option of "maxcpus=<NUM>", where <NUM>
is an integer greater than 0, limits the maximum number
of CPUs activated in SMP mode to <NUM>.
Using "maxcpus=1" on an SMP kernel is the trivial
case of an SMP kernel with only one CPU.
should make use of. maxcpus=n : n >= 0 limits the
kernel to using 'n' processors. n=0 is a special case,
it is equivalent to "nosmp", which also disables
the IO APIC.
max_addr=[KMG] [KNL,BOOT,ia64] All physical memory greater than or
equal to this physical address is ignored.
@ -972,11 +970,11 @@ and is between 256 and 4096 characters. It is defined in the file
[SCSI] Maximum number of LUNs received.
Should be between 1 and 16384.
mca-pentium [BUGS=IA-32]
mca-pentium [BUGS=X86-32]
mcatest= [IA-64]
mce [IA-32] Machine Check Exception
mce [X86-32] Machine Check Exception
md= [HW] RAID subsystems devices and level
See Documentation/md.txt.
@ -988,14 +986,14 @@ and is between 256 and 4096 characters. It is defined in the file
mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
Amount of memory to be used when the kernel is not able
to see the whole system memory or for test.
[IA-32] Use together with memmap= to avoid physical
[X86-32] Use together with memmap= to avoid physical
address space collisions. Without memmap= PCI devices
could be placed at addresses belonging to unused RAM.
mem=nopentium [BUGS=IA-32] Disable usage of 4MB pages for kernel
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
memory.
memmap=exactmap [KNL,IA-32,X86_64] Enable setting of an exact
memmap=exactmap [KNL,X86-32,X86_64] Enable setting of an exact
E820 memory map, as specified by the user.
Such memmap=exactmap lines can be constructed based on
BIOS output or other requirements. See the memmap=nn@ss
@ -1016,6 +1014,10 @@ and is between 256 and 4096 characters. It is defined in the file
meye.*= [HW] Set MotionEye Camera parameters
See Documentation/video4linux/meye.txt.
mfgpt_irq= [IA-32] Specify the IRQ to use for the
Multi-Function General Purpose Timers on AMD Geode
platforms.
mga= [HW,DRM]
mousedev.tap_time=
@ -1039,7 +1041,7 @@ and is between 256 and 4096 characters. It is defined in the file
<name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>]
mtdparts= [MTD]
See drivers/mtd/cmdline.c.
See drivers/mtd/cmdlinepart.c.
mtouchusb.raw_coordinates=
[HW] Make the MicroTouch USB driver use raw coordinates
@ -1081,16 +1083,12 @@ and is between 256 and 4096 characters. It is defined in the file
[NFS] set the maximum lifetime for idmapper cache
entries.
nmi_watchdog= [KNL,BUGS=IA-32] Debugging features for SMP kernels
nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels
no387 [BUGS=IA-32] Tells the kernel to use the 387 maths
no387 [BUGS=X86-32] Tells the kernel to use the 387 maths
emulation library even if a 387 maths coprocessor
is present.
noacpi [LIBATA] Disables use of ACPI in libata suspend/resume
when set.
Format: <int>
noaliencache [MM, NUMA, SLAB] Disables the allocation of alien
caches in the slab allocator. Saves per-node memory,
but will impact performance.
@ -1114,17 +1112,17 @@ and is between 256 and 4096 characters. It is defined in the file
noexec [IA-64]
noexec [IA-32,X86-64]
noexec [X86-32,X86-64]
noexec=on: enable non-executable mappings (default)
noexec=off: disable nn-executable mappings
nofxsr [BUGS=IA-32] Disables x86 floating point extended
nofxsr [BUGS=X86-32] Disables x86 floating point extended
register save and restore. The kernel will only save
legacy floating-point registers on task switch.
nohlt [BUGS=ARM]
no-hlt [BUGS=IA-32] Tells the kernel that the hlt
no-hlt [BUGS=X86-32] Tells the kernel that the hlt
instruction doesn't work correctly and not to
use it.
@ -1139,12 +1137,12 @@ and is between 256 and 4096 characters. It is defined in the file
Valid arguments: on, off
Default: on
noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing
noirqbalance [X86-32,SMP,KNL] Disable kernel irq balancing
noirqdebug [IA-32] Disables the code which attempts to detect and
noirqdebug [X86-32] Disables the code which attempts to detect and
disable unhandled interrupt sources.
no_timer_check [IA-32,X86_64,APIC] Disables the code which tests for
no_timer_check [X86-32,X86_64,APIC] Disables the code which tests for
broken timer IRQ sources.
noisapnp [ISAPNP] Disables ISA PnP code.
@ -1154,20 +1152,25 @@ and is between 256 and 4096 characters. It is defined in the file
nointroute [IA-64]
nolapic [IA-32,APIC] Do not enable or use the local APIC.
nojitter [IA64] Disables jitter checking for ITC timers.
nolapic_timer [IA-32,APIC] Do not use the local APIC timer.
nolapic [X86-32,APIC] Do not enable or use the local APIC.
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
noltlbs [PPC] Do not use large page/tlb entries for kernel
lowmem mapping on PPC40x.
nomca [IA-64] Disable machine check abort handling
nomce [IA-32] Machine Check Exception
nomce [X86-32] Machine Check Exception
noreplace-paravirt [IA-32,PV_OPS] Don't patch paravirt_ops
nomfgpt [X86-32] Disable Multi-Function General Purpose
Timer usage (for AMD Geode machines).
noreplace-smp [IA-32,SMP] Don't replace SMP instructions
noreplace-paravirt [X86-32,PV_OPS] Don't patch paravirt_ops
noreplace-smp [X86-32,SMP] Don't replace SMP instructions
with UP alternatives
noresidual [PPC] Don't use residual data on PReP machines.
@ -1181,15 +1184,16 @@ and is between 256 and 4096 characters. It is defined in the file
nosbagart [IA-64]
nosep [BUGS=IA-32] Disables x86 SYSENTER/SYSEXIT support.
nosep [BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
nosmp [SMP] Tells an SMP kernel to act as a UP kernel.
nosmp [SMP] Tells an SMP kernel to act as a UP kernel,
and disable the IO APIC. legacy for "maxcpus=0".
nosoftlockup [KNL] Disable the soft-lockup detector.
nosync [HW,M68K] Disables sync negotiation for all devices.
notsc [BUGS=IA-32] Disable Time Stamp Counter
notsc [BUGS=X86-32] Disable Time Stamp Counter
nousb [USB] Disable the USB subsystem
@ -1262,28 +1266,33 @@ and is between 256 and 4096 characters. It is defined in the file
See also Documentation/paride.txt.
pci=option[,option...] [PCI] various PCI subsystem options:
off [IA-32] don't probe for the PCI bus
bios [IA-32] force use of PCI BIOS, don't access
off [X86-32] don't probe for the PCI bus
bios [X86-32] force use of PCI BIOS, don't access
the hardware directly. Use this if your machine
has a non-standard PCI host bridge.
nobios [IA-32] disallow use of PCI BIOS, only direct
nobios [X86-32] disallow use of PCI BIOS, only direct
hardware access methods are allowed. Use this
if you experience crashes upon bootup and you
suspect they are caused by the BIOS.
conf1 [IA-32] Force use of PCI Configuration
conf1 [X86-32] Force use of PCI Configuration
Mechanism 1.
conf2 [IA-32] Force use of PCI Configuration
conf2 [X86-32] Force use of PCI Configuration
Mechanism 2.
nommconf [IA-32,X86_64] Disable use of MMCONFIG for PCI
noaer [PCIE] If the PCIEAER kernel config parameter is
enabled, this kernel boot option can be used to
disable the use of PCIE advanced error reporting.
nodomains [PCI] Disable support for multiple PCI
root domains (aka PCI segments, in ACPI-speak).
nommconf [X86-32,X86_64] Disable use of MMCONFIG for PCI
Configuration
nomsi [MSI] If the PCI_MSI kernel config parameter is
enabled, this kernel boot option can be used to
disable the use of MSI interrupts system-wide.
nosort [IA-32] Don't sort PCI devices according to
nosort [X86-32] Don't sort PCI devices according to
order given by the PCI BIOS. This sorting is
done to get a device order compatible with
older kernels.
biosirq [IA-32] Use PCI BIOS calls to get the interrupt
biosirq [X86-32] Use PCI BIOS calls to get the interrupt
routing table. These calls are known to be buggy
on several machines and they hang the machine
when used, but on other computers it's the only
@ -1291,33 +1300,35 @@ and is between 256 and 4096 characters. It is defined in the file
this option if the kernel is unable to allocate
IRQs or discover secondary PCI buses on your
motherboard.
rom [IA-32] Assign address space to expansion ROMs.
rom [X86-32] Assign address space to expansion ROMs.
Use with caution as certain devices share
address decoders between ROMs and other
resources.
irqmask=0xMMMM [IA-32] Set a bit mask of IRQs allowed to be
irqmask=0xMMMM [X86-32] Set a bit mask of IRQs allowed to be
assigned automatically to PCI devices. You can
make the kernel exclude IRQs of your ISA cards
this way.
pirqaddr=0xAAAAA [IA-32] Specify the physical address
pirqaddr=0xAAAAA [X86-32] Specify the physical address
of the PIRQ table (normally generated
by the BIOS) if it is outside the
F0000h-100000h range.
lastbus=N [IA-32] Scan all buses thru bus #N. Can be
lastbus=N [X86-32] Scan all buses thru bus #N. Can be
useful if the kernel is unable to find your
secondary buses and you want to tell it
explicitly which ones they are.
assign-busses [IA-32] Always assign all PCI bus
assign-busses [X86-32] Always assign all PCI bus
numbers ourselves, overriding
whatever the firmware may have done.
usepirqmask [IA-32] Honor the possible IRQ mask stored
usepirqmask [X86-32] Honor the possible IRQ mask stored
in the BIOS $PIR table. This is needed on
some systems with broken BIOSes, notably
some HP Pavilion N5400 and Omnibook XE3
notebooks. This will have no effect if ACPI
IRQ routing is enabled.
noacpi [IA-32] Do not use ACPI for IRQ routing
noacpi [X86-32] Do not use ACPI for IRQ routing
or for PCI scanning.
use_crs [X86-32] Use _CRS for PCI resource
allocation.
routeirq Do IRQ routing for all PCI devices.
This is normally done in pci_enable_device(),
so this option is a temporary workaround
@ -1434,6 +1445,10 @@ and is between 256 and 4096 characters. It is defined in the file
pt. [PARIDE]
See Documentation/paride.txt.
pty.legacy_count=
[KNL] Number of legacy pty's. Overwrites compiled-in
default number.
quiet [KNL] Disable most log messages
r128= [HW,DRM]
@ -1465,13 +1480,13 @@ and is between 256 and 4096 characters. It is defined in the file
Run specified binary instead of /init from the ramdisk,
used for early userspace startup. See initrd.
reboot= [BUGS=IA-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
Format: <reboot_mode>[,<reboot_mode2>[,...]]
See arch/*/kernel/reboot.c or arch/*/kernel/process.c
reserve= [KNL,BUGS] Force the kernel to ignore some iomem area
reservetop= [IA-32]
reservetop= [X86-32]
Format: nn[KMG]
Reserves a hole at the top of the kernel virtual
address space.
@ -1562,7 +1577,7 @@ and is between 256 and 4096 characters. It is defined in the file
Value can be changed at runtime via
/selinux/compat_net.
serialnumber [BUGS=IA-32]
serialnumber [BUGS=X86-32]
sg_def_reserved_size= [SCSI]
@ -1615,7 +1630,7 @@ and is between 256 and 4096 characters. It is defined in the file
smart2= [HW]
Format: <io1>[,<io2>[,...,<io8>]]
smp-alt-once [IA-32,SMP] On a hotplug CPU system, only
smp-alt-once [X86-32,SMP] On a hotplug CPU system, only
attempt to substitute SMP alternatives once at boot.
smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices
@ -1821,6 +1836,30 @@ and is between 256 and 4096 characters. It is defined in the file
thash_entries= [KNL,NET]
Set number of hash buckets for TCP connection
thermal.act= [HW,ACPI]
-1: disable all active trip points in all thermal zones
<degrees C>: override all lowest active trip points
thermal.crt= [HW,ACPI]
-1: disable all critical trip points in all thermal zones
<degrees C>: lower all critical trip points
thermal.nocrt= [HW,ACPI]
Set to disable actions on ACPI thermal zone
critical and hot trip points.
thermal.off= [HW,ACPI]
1: disable ACPI thermal control
thermal.psv= [HW,ACPI]
-1: disable all passive trip points
<degrees C>: override all passive trip points to this value
thermal.tzp= [HW,ACPI]
Specify global default ACPI thermal zone polling rate
<deci-seconds>: poll all this frequency
0: no polling (default)
time Show timing data prefixed to each printk message line
[deprecated, see 'printk.time']
@ -1880,15 +1919,18 @@ and is between 256 and 4096 characters. It is defined in the file
usbhid.mousepoll=
[USBHID] The interval which mice are to be polled at.
vdso= [IA-32,SH]
vdso= [X86-32,SH,x86-64]
vdso=2: enable compat VDSO (default with COMPAT_VDSO)
vdso=1: enable VDSO (default)
vdso=0: disable VDSO mapping
vector= [IA-64,SMP]
vector=percpu: enable percpu vector domain
video= [FB] Frame buffer configuration
See Documentation/fb/modedb.txt.
vga= [BOOT,IA-32] Select a particular video mode
vga= [BOOT,X86-32] Select a particular video mode
See Documentation/i386/boot.txt and
Documentation/svga.txt.
Use vga=ask for menu.
@ -1920,7 +1962,7 @@ and is between 256 and 4096 characters. It is defined in the file
See header of drivers/scsi/wd7000.c.
wdt= [WDT] Watchdog
See Documentation/watchdog/watchdog.txt.
See Documentation/watchdog/wdt.txt.
xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks.
xd_geo= See header of drivers/block/xd.c.

View File

@ -859,9 +859,8 @@ payload contents" for more information.
void unregister_key_type(struct key_type *type);
Under some circumstances, it may be desirable to desirable to deal with a
bundle of keys. The facility provides access to the keyring type for managing
such a bundle:
Under some circumstances, it may be desirable to deal with a bundle of keys.
The facility provides access to the keyring type for managing such a bundle:
struct key_type key_type_keyring;

623
Documentation/ko_KR/HOWTO Normal file
View File

@ -0,0 +1,623 @@
NOTE:
This is a version of Documentation/HOWTO translated into korean
This document is maintained by minchan Kim < minchan.kim@gmail.com>
If you find any difference between this document and the original file or
a problem with the translation, please contact the maintainer of this file.
Please also note that the purpose of this file is to be easier to
read for non English (read: korean) speakers and is not intended as
a fork. So if you have any comments or updates for this file please
try to update the original English file first.
==================================
이 문서는
Documentation/HOWTO
의 한글 번역입니다.
역자: 김민찬 <minchan.kim@gmail.com >
감수: 이제이미 <jamee.lee@samsung.com>
==================================
어떻게 리눅스 커널 개발을 하는가
---------------------------------
이 문서는 커널 개발에 있어 가장 중요한 문서이다. 이 문서는
리눅스 커널 개발자가 되는 법과 리눅스 커널 개발 커뮤니티와 일하는
법을 담고있다. 커널 프로그래밍의기술적인 측면과 관련된 내용들은
포함하지 않으려고 하였지만 올바으로 여러분을 안내하는 데 도움이
될 것이다.
이 문서에서 오래된 것을 발견하면 문서의 아래쪽에 나열된 메인트너에게
패치를 보내달라.
소개
----
자, 여러분은 리눅스 커널 개발자가 되는 법을 배우고 싶은가? 아니면
상사로부터"이 장치를 위한 리눅스 드라이버를 작성하시오"라는 말을
들었는가? 이 문서는 여러분이 겪게 될 과정과 커뮤니티와 일하는 법을
조언하여 여러분의 목적을 달성하기 위해 필요한 것 모두를 알려주는
것이다.
커널은 대부분은 C로 작성되었어고 몇몇 아키텍쳐의 의존적인 부분은
어셈블리로 작성되었다. 커널 개발을 위해 C를 잘 이해하고 있어야 한다.
여러분이 특정 아키텍쳐의 low-level 개발을 할 것이 아니라면
어셈블리(특정 아키텍쳐)는 잘 알아야 할 필요는 없다.
다음의 참고서적들은 기본에 충실한 C 교육이나 수년간의 경험에 견주지는
못하지만 적어도 참고 용도로는 좋을 것이다
- "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
- "Practical C Programming" by Steve Oualline [O'Reilly]
- "C: A Reference Manual" by Harbison and Steele [Prentice Hall]
커널은 GNU C와 GNU 툴체인을 사용하여 작성되었다. 이 툴들은 ISO C89 표준을
따르는 반면 표준에 있지 않은 많은 확장기능도 가지고 있다. 커널은 표준 C
라이브러리와는 관계없이 freestanding C 환경이어서 C 표준의 일부는
지원되지 않는다. 임의의 long long 나누기나 floating point는 지원되지 않는다.
때론 이런 이유로 커널이 그런 확장 기능을 가진 툴체인을 가지고 만들어졌다는
것이 이해하기 어려울 수도 있고 게다가 불행하게도 그런 것을 정확하게 설명하는
어떤 참고문서도 있지 않다. 정보를 얻기 위해서는 gcc info (`info gcc`)페이지를
살펴보라.
여러분은 기존의 개발 커뮤니티와 일하는 법을 배우려고 하고 있다는 것을
기억하라. 코딩, 스타일, 절차에 관한 훌륭한 표준을 가진 사람들이 모인
다양한 그룹이 있다. 이 표준들은 오랜동안 크고 지역적으로 분산된 팀들에
의해 가장 좋은 방법으로 일하기위하여 찾은 것을 기초로 만들어져왔다.
그 표준들은 문서화가 잘 되어 있기 때문에 가능한한 미리 많은 표준들에
관하여 배우려고 시도하라. 다른 사람들은 여러분이나 여러분의 회사가
일하는 방식에 적응하는 것을 원하지는 않는다.
법적 문제
---------
리눅스 커널 소스 코드는 GPL로 배포(release)되었다. 소스트리의 메인
디렉토리에 있는 라이센스에 관하여 상세하게 쓰여 있는 COPYING이라는
파일을 봐라.여러분이 라이센스에 관한 더 깊은 문제를 가지고 있다면
리눅스 커널 메일링 리스트에 묻지말고 변호사와 연락하라. 메일링
리스트들에 있는 사람들은 변호사가 아니기 때문에 법적 문제에 관하여
그들의 말에 의지해서는 안된다.
GPL에 관한 잦은 질문들과 답변들은 다음을 참조하라.
http://www.gnu.org/licenses/gpl-faq.html
문서
----
리눅스 커널 소스 트리는 커널 커뮤니티와 일하는 법을 배우기 위한 많은
귀중한 문서들을 가지고 있다. 새로운 기능들이 커널에 들어가게 될 때,
그 기능을 어떻게 사용하는지에 관한 설명을 위하여 새로운 문서 파일을
추가하는 것을 권장한다. 커널이 유저스페이스로 노출하는 인터페이스를
변경하게 되면 변경을 설명하는 메뉴얼 페이지들에 대한 패치나 정보를
mtk-manpages@gmx.net의 메인트너에게 보낼 것을 권장한다.
다음은 커널 소스 트리에 있는 읽어야 할 파일들의 리스트이다.
README
이 파일은 리눅스 커널에 관하여 간단한 배경 설명과 커널을 설정하고
빌드하기 위해 필요한 것을 설명한다. 커널에 입문하는 사람들은 여기서
시작해야 한다.
Documentation/Changes
이 파일은 커널을 성공적으로 빌드하고 실행시키기 위해 필요한 다양한
소프트웨어 패키지들의 최소 버젼을 나열한다.
Documentation/CodingStyle
이 문서는 리눅스 커널 코딩 스타일과 그렇게 한 몇몇 이유를 설명한다.
모든 새로운 코드는 이 문서에 가이드라인들을 따라야 한다. 대부분의
메인트너들은 이 규칙을 따르는 패치들만을 받아들일 것이고 많은 사람들이
그 패치가 올바른 스타일일 경우만 코드를 검토할 것이다.
Documentation/SubmittingPatches
Documentation/SubmittingDrivers
이 파일들은 성공적으로 패치를 만들고 보내는 법을 다음의 내용들로
굉장히 상세히 설명하고 있다(그러나 다음으로 한정되진 않는다).
- Email 내용들
- Email 양식
- 그것을 누구에게 보낼지
이러한 규칙들을 따르는 것이 성공을 보장하진 않는다(왜냐하면 모든
패치들은 내용과 스타일에 관하여 면밀히 검토되기 때문이다).
그러나 규칙을 따르지 않는다면 거의 성공하지도 못할 것이다.
올바른 패치들을 만드는 법에 관한 훌륭한 다른 문서들이 있다.
"The Perfect Patch"
http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
"Linux kernel patch submission format"
http://linux.yyz.us/patch-format.html
Documentation/stable_api_nonsense.txt
이 문서는 의도적으로 커널이 변하지 않는 API를 갖지 않도록 결정한
이유를 설명하며 다음과 같은 것들을 포함한다.
- 서브시스템 shim-layer(호환성을 위해?)
- 운영 체제들 간의 드라이버 이식성
- 커널 소스 트리내에 빠른 변화를 늦추는 것(또는 빠른 변화를 막는 것)
이 문서는 리눅스 개발 철학을 이해하는데 필수적이며 다른 운영체제에서
리눅스로 옮겨오는 사람들에게는 매우 중요하다.
Documentation/SecurityBugs
여러분들이 리눅스 커널의 보안 문제를 발견했다고 생각한다면 이 문서에
나온 단계에 따라서 커널 개발자들에게 알리고 그 문제를 해결할 수 있도록
도와 달라.
Documentation/ManagementStyle
이 문서는 리눅스 커널 메인트너들이 어떻게 그들의 방법론의 정신을
어떻게 공유하고 운영하는지를 설명한다. 이것은 커널 개발에 입문하는
모든 사람들(또는 커널 개발에 작은 호기심이라도 있는 사람들)이
읽어야 할 중요한 문서이다. 왜냐하면 이 문서는 커널 메인트너들의
독특한 행동에 관하여 흔히 있는 오해들과 혼란들을 해소하고 있기
때문이다.
Documentation/stable_kernel_rules.txt
이 문서는 안정적인 커널 배포가 이루어지는 규칙을 설명하고 있으며
여러분들이 이러한 배포들 중 하나에 변경을 하길 원한다면
무엇을 해야 하는지를 설명한다.
Documentation/kernel-docs.txt
커널 개발에 관계된 외부 문서의 리스트이다. 커널 내의 포함된 문서들
중에 여러분이 찾고 싶은 문서를 발견하지 못할 경우 이 리스트를
살펴보라.
Documentation/applying-patches.txt
패치가 무엇이며 그것을 커널의 다른 개발 브랜치들에 어떻게
적용하는지에 관하여 자세히 설명 하고 있는 좋은 입문서이다.
커널은 소스 코드 그 자체에서 자동적으로 만들어질 수 있는 많은 문서들을
가지고 있다. 이것은 커널 내의 API에 대한 모든 설명, 그리고 락킹을
올바르게 처리하는 법에 관한 규칙을 포함하고 있다. 이 문서는
Documentation/DocBook/ 디렉토리 내에서 만들어지며 PDF, Postscript, HTML,
그리고 man 페이지들로 다음과 같이 실행하여 만들어 진다.
make pdfdocs
make psdocs
make htmldocs
make mandocs
각각의 명령을 메인 커널 소스 디렉토리로부터 실행한다.
커널 개발자가 되는 것
---------------------
여러분이 리눅스 커널 개발에 관하여 아무것도 모른다면 Linux KernelNewbies
프로젝트를 봐야 한다.
http://kernelnewbies.org
그곳은 거의 모든 종류의 기본적인 커널 개발 질문들(질문하기 전에 먼저
아카이브를 찾아봐라. 과거에 이미 답변되었을 수도 있다)을 할수있는 도움이
될만한 메일링 리스트가 있다. 또한 실시간으로 질문 할수 있는 IRC 채널도
가지고 있으며 리눅스 커널 개발을 배우는 데 유용한 문서들을 보유하고 있다.
웹사이트는 코드구성, 서브시스템들, 그리고 현재 프로젝트들
(트리 내, 외부에 존재하는)에 관한 기본적인 정보들을 가지고 있다. 또한
그곳은 커널 컴파일이나 패치를 하는 법과 같은 기본적인 것들을 설명한다.
여러분이 어디서 시작해야 할진 모르지만 커널 개발 커뮤니티에 참여할 수
있는 일들을 찾길 원한다면 리눅스 커널 Janitor 프로젝트를 살펴봐라.
http://janitor.kernelnewbies.org/
그곳은 시작하기에 아주 딱 좋은 곳이다. 그곳은 리눅스 커널 소스 트리내에
간단히 정리되고 수정될 수 있는 문제들에 관하여 설명한다. 여러분은 이
프로젝트를 대표하는 개발자들과 일하면서 자신의 패치를 리눅스 커널 트리에
반영하기 위한 기본적인 것들을 배우게 될것이며 여러분이 아직 아이디어를
가지고 있지 않다면 다음에 무엇을 해야할지에 관한 방향을 배울 수 있을
것이다.
여러분들이 이미 커널 트리에 반영하길 원하는 코드 묶음을 가지고 있지만
올바른 포맷으로 포장하는데 도움이 필요하다면 그러한 문제를 돕기 위해
만들어진 kernel-mentors 프로젝트가 있다. 그곳은 메일링 리스트이며
다음에서 참조할 수 있다.
http://selenic.com/mailman/listinfo/kernel-mentors
리눅스 커널 코드에 실제 변경을 하기 전에 반드시 그 코드가 어떻게
동작하는지 이해하고 있어야 한다. 코드를 분석하기 위하여 특정한 툴의
도움을 빌려서라도 코드를 직접 읽는 것보다 좋은 것은 없다(대부분의
자잘한 부분들은 잘 코멘트되어 있다). 그런 툴들 중에 특히 추천할만한
것은 Linux Cross-Reference project이며 그것은 자기 참조 방식이며
소스코드를 인덱스된 웹 페이지들의 형태로 보여준다. 최신의 멋진 커널
코드 저장소는 다음을 통하여 참조할 수 있다.
http://sosdg.org/~coywolf/lxr/
개발 프로세스
-------------
리눅스 커널 개발 프로세스는 현재 몇몇 다른 메인 커널 "브랜치들"과
서브시스템에 특화된 커널 브랜치들로 구성된다. 몇몇 다른 메인
브랜치들은 다음과 같다.
- main 2.6.x 커널 트리
- 2.6.x.y - 안정된 커널 트리
- 2.6.x -git 커널 패치들
- 2.6.x -mm 커널 패치들
- 서브시스템을 위한 커널 트리들과 패치들
2.6.x 커널 트리
---------------
2.6.x 커널들은 Linux Torvalds가 관리하며 kernel.org의 pub/linux/kernel/v2.6/
디렉토리에서 참조될 수 있다.개발 프로세스는 다음과 같다.
- 새로운 커널이 배포되자마자 2주의 시간이 주어진다. 이 기간동은
메인트너들은 큰 diff들을 Linus에게 제출할 수 있다. 대개 이 패치들은
몇 주 동안 -mm 커널내에 이미 있었던 것들이다. 큰 변경들을 제출하는 데
선호되는 방법은 git(커널의 소스 관리 툴, 더 많은 정보들은 http://git.or.cz/
에서 참조할 수 있다)를 사용하는 것이지만 순수한 패치파일의 형식으로 보내도
것도 무관하다.
- 2주 후에 -rc1 커널이 배포되며 지금부터는 전체 커널의 안정성에 영향을
미칠수 있는 새로운 기능들을 포함하지 않는 패치들만을 추가될 수 있다.
완전히 새로운 드라이버(혹은 파일시스템)는 -rc1 이후에만 받아들여진다는
것을 기억해라. 왜냐하면 변경이 자체내에서만 발생하고 추가된 코드가
드라이버 외부의 다른 부분에는 영향을 주지 않으므로 그런 변경은
퇴보(regression)를 일으킬 만한 위험을 가지고 있지 않기 때문이다. -rc1이
배포된 이후에 git를 사용하여 패치들을 Linus에게 보낼수 있지만 패치들은
공식적인 메일링 리스트로 보내서 검토를 받을 필요가 있다.
- 새로운 -rc는 Linus는 현재 git tree가 테스트 하기에 충분히 안정된 상태에
있다고 판단될 때마다 배포된다. 목표는 새로운 -rc 커널을 매주 배포하는
것이다.
- 이러한 프로세스는 커널이 "준비"되었다고 여겨질때까지 계속된다.
프로세스는 대체로 6주간 지속된다.
- 각 -rc 배포에 있는 알려진 퇴보의 목록들은 다음 URI에 남겨진다.
http://kernelnewbies.org/known_regressions
커널 배포에 있어서 언급할만한 가치가 있는 리눅스 커널 메일링 리스트의
Andrew Morton의 글이 있다.
"커널이 언제 배포될지는 아무로 모른다. 왜냐하면 배포는 알려진
버그의 상황에 따라 배포되는 것이지 미리정해 놓은 시간에 따라
배포되는 것은 아니기 때문이다."
2.6.x.y - 안정 커널 트리
------------------------
4 자리 숫자로 이루어진 버젼의 커널들은 -stable 커널들이다. 그것들은 2.6.x
커널에서 발견된 큰 퇴보들이나 보안 문제들 중 비교적 작고 중요한 수정들을
포함한다.
이것은 가장 최근의 안정적인 커널을 원하는 사용자에게 추천되는 브랜치이며,
개발/실험적 버젼을 테스트하는 것을 돕는데는 별로 관심이 없다.
어떤 2.6.x.y 커널도 사용가능하지 않다면 그때는 가장 높은 숫자의 2.6.x
커널이 현재의 안정 커널이다.
2.6.x.y는 "stable" 팀<stable@kernel.org>에 의해 관리되며 거의 매번 격주로
배포된다.
커널 트리 문서들 내에 Documentation/stable_kernel_rules.txt 파일은 어떤
종류의 변경들이 -stable 트리로 들어왔는지와 배포 프로세스가 어떻게
진행되는지를 설명한다.
2.6.x -git 패치들
------------------
git 저장소(그러므로 -git이라는 이름이 붙음)에는 날마다 관리되는 Linus의
커널 트리의 snapshot 들이 있다. 이 패치들은 일반적으로 날마다 배포되며
Linus의 트리의 현재 상태를 나타낸다. 이 패치들은 정상적인지 조금도
살펴보지 않고 자동적으로 생성된 것이므로 -rc 커널들 보다도 더 실험적이다.
2.6.x -mm 커널 패치들
---------------------
Andrew Morton에 의해 배포된 실험적인 커널 패치들이다. Andrew는 모든 다른
서브시스템 커널 트리와 패치들을 가져와서 리눅스 커널 메일링 리스트로
온 많은 패치들과 한데 묶는다. 이 트리는 새로운 기능들과 패치들을 위한
장소를 제공하는 역할을 한다. 하나의 패치가 -mm에 한동안 있으면서 그 가치가
증명되게 되면 Andrew나 서브시스템 메인트너는 그것을 메인라인에 포함시키기
위하여 Linus에게 보낸다.
커널 트리에 포함하고 싶은 모든 새로운 패치들은 Linus에게 보내지기 전에
-mm 트리에서 테스트를 하는 것을 적극 추천한다.
이 커널들은 안정되게 사용할 시스템에서에 실행하는 것은 적합하지 않으며
다른 브랜치들의 어떤 것들보다 위험하다.
여러분이 커널 개발 프로세스를 돕길 원한다면 이 커널 배포들을 사용하고
테스트한 후 어떤 문제를 발견하거나 또는 모든 것이 잘 동작한다면 리눅스
커널 메일링 리스트로 피드백을 해달라.
이 커널들은 일반적으로 모든 다른 실험적인 패치들과 배포될 당시의
사용가능한 메인라인 -git 커널들의 몇몇 변경을 포함한다.
-mm 커널들은 정해진 일정대로 배포되지 않는다. 하지만 대개 몇몇 -mm 커널들은
각 -rc 커널(1부터 3이 흔함) 사이에서 배포된다.
서브시스템 커널 트리들과 패치들
-------------------------------
많은 다른 커널 서브시스템 개발자들은 커널의 다른 부분들에서 무슨 일이
일어나고 있는지를 볼수 있도록 그들의 개발 트리를 공개한다. 이 트리들은
위에서 설명하였던 것 처럼 -mm 커널 배포들로 합쳐진다.
다음은 활용가능한 커널 트리들을 나열한다.
git trees:
- Kbuild development tree, Sam Ravnborg < sam@ravnborg.org>
git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
- ACPI development tree, Len Brown <len.brown@intel.com >
git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
- Block development tree, Jens Axboe <axboe@suse.de>
git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
- DRM development tree, Dave Airlie <airlied@linux.ie>
git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git
- ia64 development tree, Tony Luck < tony.luck@intel.com>
git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git
- infiniband, Roland Dreier <rolandd@cisco.com >
git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
- libata, Jeff Garzik <jgarzik@pobox.com>
git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git
- network drivers, Jeff Garzik <jgarzik@pobox.com>
git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git
- pcmcia, Dominik Brodowski < linux@dominikbrodowski.net>
git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git
- SCSI, James Bottomley < James.Bottomley@SteelEye.com>
git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
quilt trees:
- USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman < gregkh@suse.de>
kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
- x86-64, partly i386, Andi Kleen < ak@suse.de>
ftp.firstfloor.org:/pub/ak/x86_64/quilt/
다른 커널 트리들은 http://kernel.org/git와 MAINTAINERS 파일에서 참조할 수
있다.
버그 보고
---------
bugzilla.kernel.org는 리눅스 커널 개발자들이 커널의 버그를 추적하는 곳이다.
사용자들은 발견한 모든 버그들을 보고하기 위하여 이 툴을 사용할 것을 권장한다.
kernel bugzilla를 사용하는 자세한 방법은 다음을 참조하라.
http://test.kernel.org/bugzilla/faq.html
메인 커널 소스 디렉토리에 있는 REPORTING-BUGS 파일은 커널 버그일 것 같은
것을 보고하는는 법에 관한 좋은 템플릿이고 문제를 추적하기 위해서 커널
개발자들이 필요로 하는 정보가 무엇들인지를 상세히 설명하고 있다.
버그 리포트들의 관리
--------------------
여러분의 해킹 기술을 연습하는 가장 좋은 방법 중의 하는 다른 사람들이
보고한 버그들을 수정하는 것이다. 여러분은 커널을 더욱 안정화시키는데
도움을 줄 뿐만이 아니라 실제있는 문제들을 수정하는 법을 배우게 되고
그와 함께 여러분들의 기술은 향상될 것이며 다른 개발자들이 여러분의
존재에 대해 알게 될 것이다. 버그를 수정하는 것은 개발자들 사이에서
점수를 얻을 수 있는 가장 좋은 방법중의 하나이다. 왜냐하면 많은 사람들은
다른 사람들의 버그들을 수정하기 위하여 시간을 낭비하지 않기 때문이다.
이미 보고된 버그 리포트들을 가지고 작업하기 위해서 http://bugzilla.kernelorg를
참조하라. 여러분이 앞으로 생겨날 버그 리포트들의 조언자가 되길 원한다면
bugme-new 메일링 리스트나(새로운 버그 리포트들만이 이곳에서 메일로 전해진다)
bugme-janitor 메일링 리스트(bugzilla에 모든 변화들이 여기서 메일로 전해진다)
에 등록하면 된다.
http://lists.osdl.org/mailman/listinfo/bugme-new
http://lists.osdl.org/mailman/listinfo/bugme-janitors
메일링 리스트들
---------------
위의 몇몇 문서들이 설명하였지만 핵심 커널 개발자들의 대다수는
리눅스 커널 메일링 리스트에 참여하고 있다. 리스트에 등록하고 해지하는
방법에 관한 자세한 사항은 다음에서 참조할 수 있다.
http://vger.kernel.org/vger-lists.html#linux-kernel
웹상의 많은 다른 곳에도 메일링 리스트의 아카이브들이 있다.
이러한 아카이브들을 찾으려면 검색 엔진을 사용하라. 예를 들어:
http://dir.gmane.org/gmane.linux.kernel
여러분이 새로운 문제에 관해 리스트에 올리기 전에 말하고 싶은 주제에 대한
것을 아카이브에서 먼저 찾기를 강력히 권장한다. 이미 상세하게 토론된 많은
것들이 메일링 리스트의 아카이브에 기록되어 있다.
각각의 커널 서브시스템들의 대부분은 자신들의 개발에 관한 노력들로 이루어진
분리된 메일링 리스트를 따로 가지고 있다. 다른 그룹들이 무슨 리스트를 가지고
있는지는 MAINTAINERS 파일을 참조하라.
많은 리스트들은 kernel.org에서 호스트되고 있다. 그 정보들은 다음에서 참조될 수 있다.
http://vger.kernel.org/vger-lists.html
리스트들을 사용할 때는 올바른 예절을 따를 것을 유념해라.
대단하진 않지만 다음 URL은 리스트(혹은 모든 리스트)와 대화하는 몇몇 간단한
가이드라인을 가지고 있다.
http://www.albion.com/netiquette/
여러 사람들이 여러분의 메일에 응답한다면 CC: 즉 수신 리스트는 꽤 커지게
될 것이다. 아무 이유없이 CC에서 어떤 사람도 제거하거나 리스트 주소로만
회신하지 마라. 메일을 보낸 사람으로서 하나를 받고 리스트로부터 또
하나를 받아 두번 받는 것에 익숙하여 있으니 mail-header를 조작하려고 하지
말아라. 사람들은 그런 것을 좋아하지 않을 것이다.
여러분의 회신의 문맥을 원래대로 유지해야 한다. 여러분들의 회신의 윗부분에
"John 커널해커는 작성했다...."를 유지하며 여러분들의 의견을 그 메일의 윗부분에
작성하지 말고 각 인용한 단락들 사이에 넣어라.
여러분들이 패치들을 메일에 넣는다면 그것들은 Documentation/SubmittingPatches에
나와있는데로 명백히(plain) 읽을 수 있는 텍스트여야 한다. 커널 개발자들은
첨부파일이나 압축된 패치들을 원하지 않는다. 그들은 여러분들의 패치의
각 라인 단위로 코멘트를 하길 원하며 압축하거나 첨부하지 않고 보내는 것이
그렇게 할 수 있는 유일한 방법이다. 여러분들이 사용하는 메일 프로그램이
스페이스나 탭 문자들을 조작하지 않는지 확인하라. 가장 좋은 첫 테스트는
메일을 자신에게 보내보고 스스로 그 패치를 적용해보라. 그것이 동작하지
않는다면 여러분의 메일 프로그램을 고치던가 제대로 동작하는 프로그램으로
바꾸어라.
무엇보다도 메일링 리스트의 다른 구독자들에게 보여주려 한다는 것을 기억하라.
커뮤니티와 일하는 법
--------------------
커널 커뮤니티의 목적은 가능한한 가장 좋은 커널을 제공하는 것이다. 여러분이
받아들여질 패치를 제출하게 되면 그 패치의 기술적인 이점으로 검토될 것이다.
그럼 여러분들은 무엇을 기대하고 있어야 하는가?
- 비판
- 의견
- 변경을 위한 요구
- 당위성을 위한 요구
- 고요
기억하라. 이것들은 여러분의 패치가 커널로 들어가기 위한 과정이다. 여러분의
패치들은 비판과 다른 의견을 받을 수 있고 그것들을 기술적인 레벨로 평가하고
재작업하거나 또는 왜 수정하면 안되는지에 관하여 명료하고 간결한 이유를
말할 수 있어야 한다. 여러분이 제출한 것에 어떤 응답도 있지 않다면 몇 일을
기다려보고 다시 시도해라. 때론 너무 많은 메일들 속에 묻혀버리기도 한다.
여러분은 무엇을 해서는 안되는가?
- 여러분의 패치가 아무 질문 없이 받아들여지기를 기대하는 것
- 방어적이 되는 것
- 의견을 무시하는 것
- 요청된 변경을 하지 않고 패치를 다시 제출하는 것
가능한한 가장 좋은 기술적인 해답을 찾고 있는 커뮤니티에서는 항상
어떤 패치가 얼마나 좋은지에 관하여 다른 의견들이 있을 수 있다. 여러분은
협조적이어야 하고 기꺼이 여러분의 생각을 커널 내에 맞추어야 한다. 아니면
적어도 여러분의 것이 가치있다는 것을 중명하여야 한다. 잘못된 것도 여러분이
올바른 방향의 해결책으로 이끌어갈 의지가 있다면 받아들여질 것이라는 점을
기억하라.
여러분의 첫 패치에 여러분이 수정해야하는 십여개 정도의 회신이 오는
경우도 흔하다. 이것은 여러분의 패치가 받아들여지지 않을 것이라는 것을
의미하는 것이 아니고 개인적으로 여러분에게 감정이 있어서 그러는 것도
아니다. 간단히 여러분의 패치에 제기된 문제들을 수정하고 그것을 다시
보내라.
커널 커뮤니티와 기업 조직간의 차이점
-----------------------------------------------------------------
커널 커뮤니티는 가장 전통적인 회사의 개발 환경과는 다르다. 여기에 여러분들의
문제를 피하기 위한 목록이 있다.
여러분들이 제안한 변경들에 관하여 말할 때 좋은 것들 :
- " 이것은 여러 문제들을 해겹합니다."
- "이것은 2000 라인의 코드를 제거합니다."
- "이것은 내가 말하려는 것에 관해 설명하는 패치입니다."
- "나는 5개의 다른 아키텍쳐에서 그것을 테스트했슴으로..."
- "여기에 일련의 작은 패치들이 있습음로..."
- "이것은 일반적인 머신에서 성능을 향상시키므로..."
여러분들이 말할 때 피해야 할 좋지 않은 것들 :
- "우리를 그것을 AIT/ptx/Solaris에서 이러한 방법으로 했다. 그러므로 그것은 좋은 것임에 틀립없다..."
- "나는 20년동안 이것을 해왔다. 그러므로..."
- "이것은 돈을 벌기위해 나의 회사가 필요로 하는 것이다."
- "이것은 우리의 엔터프라이즈 상품 라인을 위한 것이다."
- "여기에 나의 생각을 말하고 있는 1000 페이지 설계 문서가 있다."
- "나는 6달동안 이것을 했으니..."
- "여기세 5000라인 짜리 패치가 있으니..."
- "나는 현재 뒤죽박죽인 것을 재작성했다. 그리고 여기에..."
- "나는 마감시한을 가지고 있으므로 이 패치는 지금 적용될 필요가 있다."
커널 커뮤니티가 전통적인 소프트웨어 엔지니어링 개발 환경들과
또 다른 점은 얼굴을 보지 않고 일한다는 점이다. 이메일과 irc를 대화의
주요수단으로 사용하는 것의 한가지 장점은 성별이나 인종의 차별이
없다는 것이다. 리눅스 커널의 작업 환경에서는 단지 이메일 주소만
알수 있기 때문에 여성과 소수 민족들도 모두 받아들여진다. 국제적으로
일하게 되는 측면은 사람의 이름에 근거하여 성별을 추측할 수 없게
하기때문에 차별을 없애는 데 도움을 준다. Andrea라는 이름을 가진 남자와
Pat이라는 이름을 가진 여자가 있을 수도 있는 것이다. 리눅스 커널에서
작업하며 생각을 표현해왔던 대부분의 여성들은 긍정적인 경험을 가지고
있다.
언어 장벽은 영어에 익숙하지 않은 몇몇 사람들에게 문제가 될 수도 있다.
언어의 훌륭한 구사는 메일링 리스트에서 올바르게 자신의 생각을
표현하기 위하여 필요하다. 그래서 여러분은 이메일을 보내기 전에
영어를 올바르게 사용하고 있는지를 체크하는 것이 바람직하다.
여러분의 변경을 나누어라
------------------------
리눅스 커널 커뮤니티는 한꺼번에 굉장히 큰 코드의 묶음을 쉽게
받아들이지 않는다. 변경은 적절하게 소개되고, 검토되고, 각각의
부분으로 작게 나누어져야 한다. 이것은 회사에서 하는 것과는 정확히
반대되는 것이다. 여러분들의 제안은 개발 초기에 일찍이 소개되야 한다.
그래서 여러분들은 자신이 하고 있는 것에 관하여 피드백을 받을 수 있게
된다. 커뮤니티가 여러분들이 커뮤니티와 함께 일하고 있다는 것을
느끼도록 만들고 커뮤니티가 여러분의 기능을 위한 쓰레기 장으로서
사용되지 않고 있다는 것을 느끼게 하자. 그러나 메일링 리스트에 한번에
50개의 이메일을 보내지는 말아라. 여러분들의 일련의 패치들은 항상
더 작아야 한다.
패치를 나누는 이유는 다음과 같다.
1) 작은 패치들은 여러분의 패치들이 적용될 수 있는 확률을 높여준다.
왜냐하면 다른 사람들은 정확성을 검증하기 위하여 많은 시간과 노력을
들이기를 원하지 않는다. 5줄의 패치는 메인트너가 거의 몇 초간 힐끗
보면 적용될 수 있다. 그러나 500 줄의 패치는 정확성을 검토하기 위하여
몇시간이 걸릴 수도 있다(걸리는 시간은 패치의 크기 혹은 다른 것에
비례하여 기하급수적으로 늘어난다).
패치를 작게 만드는 것은 무엇인가 잘못되었을 때 디버그하는 것을
쉽게 만든다. 즉, 그렇게 만드는 것은 매우 큰 패치를 적용한 후에
조사하는 것 보다 작은 패치를 적용한 후에 (그리고 몇몇의 것이
깨졌을 때) 하나씩 패치들을 제거해가며 디버그 하기 쉽도록 만들어 준다.
2) 작은 패치들을 보내는 것뿐만 아니라 패치들을 제출하기전에 재작성하고
간단하게(혹은 간단한게 재배치하여) 하는 것도 중요하다.
여기에 커널 개발자 Al Viro의 이야기가 있다.
"학생의 수학 숙제를 채점하는 선생님을 생각해보라. 선생님은 학생들이
답을 얻을때까지 겪은 시행착오를 보길 원하지 않는다. 선생님들은
간결하고 가장 뛰어난 답을 보길 원한다. 훌륭한 학생은 이것을 알고
마지막으로 답을 얻기 전 중간 과정들을 제출하진 않는다.
커널 개발도 마찬가지이다. 메인트너들과 검토하는 사람들은 문제를
풀어나가는 과정속에 숨겨진 과정을 보길 원하진 않는다. 그들은
간결하고 멋진 답을 보길 원한다."
커뮤니티와 함께 일하며 뛰어난 답을 찾고 여러분들의 완성되지 않은 일들
사이에 균형을 유지해야 하는 어려움이 있을 수 있다. 그러므로 프로세스의
초반에 여러분의 일을 향상시키기위한 피드백을 얻는 것 뿐만 아니라
여러분들의 변경들을 작은 묶음으로 유지해서 심지어는 여러분의 작업의
모든 부분이 지금은 포함될 준비가 되어있지 않지만 작은 부분은 이미
받아들여질 수 있도록 유지하는 것이 바람직하다.
또한 완성되지 않았고 "나중에 수정될 것이다." 와 같은 것들은 포함하는
패치들은 받아들여지지 않을 것이라는 점을 유념하라.
변경을 정당화해라
-----------------
여러분들의 나누어진 패치들을 리눅스 커뮤니티가 왜 반영해야 하는지를
알도록 하는 것은 매우 중요하다. 새로운 기능들이 필요하고 유용하다는
것은 반드시 그에 맞는 이유가 있어야 한다.
변경을 문서화해라
-----------------
여러분이 패치를 보내려 할때는 여러분이 무엇을 말하려고 하는지를 충분히
생각하여 이메일을 작성해야 한다. 이 정보는 패치를 위한 ChangeLog가 될
것이다. 그리고 항상 그 내용을 보길 원하는 모든 사람들을 위해 보존될
것이다. 패치는 완벽하게 다음과 같은 내용들을 포함하여 설명해야 한다.
- 변경이 왜 필요한지
- 패치에 관한 전체 설계 어프로치
- 구현 상세들
- 테스트 결과들
이것이 무엇인지 더 자세한 것을 알고 싶다면 다음 문서의 ChageLog 항을 봐라.
"The Perfect Patch"
http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
이 모든 것을 하는 것은 매우 어려운 일이다. 완벽히 소화하는 데는 적어도 몇년이
걸릴 수도 있다. 많은 인내와 결의가 필요한 계속되는 개선의 과정이다. 그러나
가능한한 포기하지 말라. 많은 사람들은 이전부터 해왔던 것이고 그 사람들도
정확하게 여러분들이 지금 서 있는 그 곳부터 시작했었다.
----------
"개발 프로세스"(http://linux.tar.gz/articles/2.6-development_process) 섹션을
작성하는데 있어 참고할 문서를 사용하도록 허락해준 Paolo Ciarrocchi에게
감사한다. 여러분들이 말해야 할 것과 말해서는 안되는 것의 목록 중 일부를 제공해준
Randy Dunlap과 Gerrit Huizenga에게 감사한다. 또한 검토와 의견 그리고
공헌을 아끼지 않은 Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers,
Vojtech Pavlik, Jan Kara, Josh Boyer, Kees Cook, Andrew Morton, Andi Kleen,
Vadim Lobanov, Jesper Juhl, Adrian Bunk, Keri Harris, Frans Pop,
David A. Wheeler, Junio Hamano, Michael Kerrisk, and Alex Shepard에게도 감사를 전한다.
그들의 도움이 없었다면 이 문서는 존재하지 않았을 것이다.
메인트너: Greg Kroah-Hartman <greg@kroah.com>

View File

@ -27,7 +27,6 @@ in detail, and briefly here:
- kobjects a simple object.
- kset a set of objects of a certain type.
- ktype a set of helpers for objects of a common type.
- subsystem a controlling object for a number of ksets.
The kobject infrastructure maintains a close relationship with the
@ -54,13 +53,14 @@ embedded in larger data structures and replace fields they duplicate.
1.2 Definition
struct kobject {
char name[KOBJ_NAME_LEN];
atomic_t refcount;
const char * k_name;
struct kref kref;
struct list_head entry;
struct kobject * parent;
struct kset * kset;
struct kobj_type * ktype;
struct dentry * dentry;
struct sysfs_dirent * sd;
wait_queue_head_t poll;
};
void kobject_init(struct kobject *);
@ -137,8 +137,7 @@ If a kobject does not have a parent when it is registered, its parent
becomes its dominant kset.
If a kobject does not have a parent nor a dominant kset, its directory
is created at the top-level of the sysfs partition. This should only
happen for kobjects that are embedded in a struct subsystem.
is created at the top-level of the sysfs partition.
@ -150,10 +149,10 @@ A kset is a set of kobjects that are embedded in the same type.
struct kset {
struct subsystem * subsys;
struct kobj_type * ktype;
struct list_head list;
struct kobject kobj;
struct kset_uevent_ops * uevent_ops;
};
@ -169,8 +168,7 @@ struct kobject * kset_find_obj(struct kset *, char *);
The type that the kobjects are embedded in is described by the ktype
pointer. The subsystem that the kobject belongs to is pointed to by the
subsys pointer.
pointer.
A kset contains a kobject itself, meaning that it may be registered in
the kobject hierarchy and exported via sysfs. More importantly, the
@ -209,6 +207,41 @@ the hierarchy.
kset_find_obj() may be used to locate a kobject with a particular
name. The kobject, if found, is returned.
There are also some helper functions which names point to the formerly
existing "struct subsystem", whose functions have been taken over by
ksets.
decl_subsys(name,type,uevent_ops)
Declares a kset named '<name>_subsys' of type <type> with
uevent_ops <uevent_ops>. For example,
decl_subsys(devices, &ktype_device, &device_uevent_ops);
is equivalent to doing:
struct kset devices_subsys = {
.ktype = &ktype_devices,
.uevent_ops = &device_uevent_ops,
};
kobject_set_name(&devices_subsys, name);
The objects that are registered with a subsystem that use the
subsystem's default list must have their kset ptr set properly. These
objects may have embedded kobjects or ksets. The
following helper makes setting the kset easier:
kobj_set_kset_s(obj,subsys)
- Assumes that obj->kobj exists, and is a struct kobject.
- Sets the kset of that kobject to the kset <subsys>.
int subsystem_register(struct kset *s);
void subsystem_unregister(struct kset *s);
These are just wrappers around the respective kset_* functions.
2.3 sysfs
@ -254,114 +287,3 @@ Instances of struct kobj_type are not registered; only referenced by
the kset. A kobj_type may be referenced by an arbitrary number of
ksets, as there may be disparate sets of identical objects.
4. subsystems
4.1 Description
A subsystem represents a significant entity of code that maintains an
arbitrary number of sets of objects of various types. Since the number
of ksets and the type of objects they contain are variable, a
generic representation of a subsystem is minimal.
struct subsystem {
struct kset kset;
struct rw_semaphore rwsem;
};
int subsystem_register(struct subsystem *);
void subsystem_unregister(struct subsystem *);
struct subsystem * subsys_get(struct subsystem * s);
void subsys_put(struct subsystem * s);
A subsystem contains an embedded kset so:
- It can be represented in the object hierarchy via the kset's
embedded kobject.
- It can maintain a default list of objects of one type.
Additional ksets may attach to the subsystem simply by referencing the
subsystem before they are registered. (This one-way reference means
that there is no way to determine the ksets that are attached to the
subsystem.)
All ksets that are attached to a subsystem share the subsystem's R/W
semaphore.
4.2 subsystem Programming Interface.
The subsystem programming interface is simple and does not offer the
flexibility that the kset and kobject programming interfaces do. They
may be registered and unregistered, as well as reference counted. Each
call forwards the calls to their embedded ksets (which forward the
calls to their embedded kobjects).
4.3 Helpers
A number of macros are available to make dealing with subsystems and
their embedded objects easier.
decl_subsys(name,type)
Declares a subsystem named '<name>_subsys', with an embedded kset of
type <type>. For example,
decl_subsys(devices,&ktype_devices);
is equivalent to doing:
struct subsystem device_subsys = {
.kset = {
.kobj = {
.name = "devices",
},
.ktype = &ktype_devices,
}
};
The objects that are registered with a subsystem that use the
subsystem's default list must have their kset ptr set properly. These
objects may have embedded kobjects, ksets, or other subsystems. The
following helpers make setting the kset easier:
kobj_set_kset_s(obj,subsys)
- Assumes that obj->kobj exists, and is a struct kobject.
- Sets the kset of that kobject to the subsystem's embedded kset.
kset_set_kset_s(obj,subsys)
- Assumes that obj->kset exists, and is a struct kset.
- Sets the kset of the embedded kobject to the subsystem's
embedded kset.
subsys_set_kset(obj,subsys)
- Assumes obj->subsys exists, and is a struct subsystem.
- Sets obj->subsys.kset.kobj.kset to the subsystem's embedded kset.
4.4 sysfs
subsystems are represented in sysfs via their embedded kobjects. They
follow the same rules as previously mentioned with no exceptions. They
typically receive a top-level directory in sysfs, except when their
embedded kobject is part of another kset, or the parent of the
embedded kobject is explicitly set.
Note that the subsystem's embedded kset must be 'attached' to the
subsystem itself in order to use its rwsem. This is done after
kset_add() has been called. (Not before, because kset_add() uses its
subsystem for a default parent if it doesn't already have one).

View File

@ -247,12 +247,6 @@ control to Kprobes.) If the probed function is declared asmlinkage,
fastcall, or anything else that affects how args are passed, the
handler's declaration must match.
NOTE: A macro JPROBE_ENTRY is provided to handle architecture-specific
aliasing of jp->entry. In the interest of portability, it is advised
to use:
jp->entry = JPROBE_ENTRY(handler);
register_jprobe() returns 0 on success, or a negative errno otherwise.
4.3 register_kretprobe
@ -518,7 +512,7 @@ long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
}
static struct jprobe my_jprobe = {
.entry = JPROBE_ENTRY(jdo_fork)
.entry = jdo_fork
};
static int __init jprobe_init(void)

View File

@ -0,0 +1,28 @@
# This creates the demonstration utility "lguest" which runs a Linux guest.
# For those people that have a separate object dir, look there for .config
KBUILD_OUTPUT := ../..
ifdef O
ifeq ("$(origin O)", "command line")
KBUILD_OUTPUT := $(O)
endif
endif
# We rely on CONFIG_PAGE_OFFSET to know where to put lguest binary.
include $(KBUILD_OUTPUT)/.config
LGUEST_GUEST_TOP := ($(CONFIG_PAGE_OFFSET) - 0x08000000)
CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -Wl,-T,lguest.lds
LDLIBS:=-lz
# Removing this works for some versions of ld.so (eg. Ubuntu Feisty) and
# not others (eg. FC7).
LDFLAGS+=-static
all: lguest.lds lguest
# The linker script on x86 is so complex the only way of creating one
# which will link our binary in the right place is to mangle the
# default one.
lguest.lds:
$(LD) --verbose | awk '/^==========/ { PRINT=1; next; } /SIZEOF_HEADERS/ { gsub(/0x[0-9A-F]*/, "$(LGUEST_GUEST_TOP)") } { if (PRINT) print $$0; }' > $@
clean:
rm -f lguest.lds lguest

View File

@ -0,0 +1,58 @@
#! /bin/sh
set -e
PREFIX=$1
shift
trap 'rm -r $TMPDIR' 0
TMPDIR=`mktemp -d`
exec 3>/dev/null
for f; do
while IFS="
" read -r LINE; do
case "$LINE" in
*$PREFIX:[0-9]*:\**)
NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
if [ -f $TMPDIR/$NUM ]; then
echo "$TMPDIR/$NUM already exits prior to $f"
exit 1
fi
exec 3>>$TMPDIR/$NUM
echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
/bin/echo "$LINE" | sed -e "s/$PREFIX:[0-9]*//" -e "s/:\*/*/" >&3
;;
*$PREFIX:[0-9]*)
NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
if [ -f $TMPDIR/$NUM ]; then
echo "$TMPDIR/$NUM already exits prior to $f"
exit 1
fi
exec 3>>$TMPDIR/$NUM
echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
/bin/echo "$LINE" | sed "s/$PREFIX:[0-9]*//" >&3
;;
*:\**)
/bin/echo "$LINE" | sed -e "s/:\*/*/" -e "s,/\*\*/,," >&3
echo >&3
exec 3>/dev/null
;;
*)
/bin/echo "$LINE" >&3
;;
esac
done < $f
echo >&3
exec 3>/dev/null
done
LASTFILE=""
for f in $TMPDIR/*; do
if [ "$LASTFILE" != $(cat $TMPDIR/.$(basename $f) ) ]; then
LASTFILE=$(cat $TMPDIR/.$(basename $f) )
echo "[ $LASTFILE ]"
fi
cat $f
done

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,129 @@
Rusty's Remarkably Unreliable Guide to Lguest
- or, A Young Coder's Illustrated Hypervisor
http://lguest.ozlabs.org
Lguest is designed to be a minimal hypervisor for the Linux kernel, for
Linux developers and users to experiment with virtualization with the
minimum of complexity. Nonetheless, it should have sufficient
features to make it useful for specific tasks, and, of course, you are
encouraged to fork and enhance it.
Features:
- Kernel module which runs in a normal kernel.
- Simple I/O model for communication.
- Simple program to create new guests.
- Logo contains cute puppies: http://lguest.ozlabs.org
Developer features:
- Fun to hack on.
- No ABI: being tied to a specific kernel anyway, you can change anything.
- Many opportunities for improvement or feature implementation.
Running Lguest:
- Lguest runs the same kernel as guest and host. You can configure
them differently, but usually it's easiest not to.
You will need to configure your kernel with the following options:
CONFIG_HIGHMEM64G=n ("High Memory Support" "64GB")[1]
CONFIG_TUN=y/m ("Universal TUN/TAP device driver support")
CONFIG_EXPERIMENTAL=y ("Prompt for development and/or incomplete code/drivers")
CONFIG_PARAVIRT=y ("Paravirtualization support (EXPERIMENTAL)")
CONFIG_LGUEST=y/m ("Linux hypervisor example code")
and I recommend:
CONFIG_HZ=100 ("Timer frequency")[2]
- A tool called "lguest" is available in this directory: type "make"
to build it. If you didn't build your kernel in-tree, use "make
O=<builddir>".
- Create or find a root disk image. There are several useful ones
around, such as the xm-test tiny root image at
http://xm-test.xensource.com/ramdisks/initrd-1.1-i386.img
For more serious work, I usually use a distribution ISO image and
install it under qemu, then make multiple copies:
dd if=/dev/zero of=rootfile bs=1M count=2048
qemu -cdrom image.iso -hda rootfile -net user -net nic -boot d
- "modprobe lg" if you built it as a module.
- Run an lguest as root:
Documentation/lguest/lguest 64m vmlinux --tunnet=192.168.19.1 --block=rootfile root=/dev/lgba
Explanation:
64m: the amount of memory to use.
vmlinux: the kernel image found in the top of your build directory. You
can also use a standard bzImage.
--tunnet=192.168.19.1: configures a "tap" device for networking with this
IP address.
--block=rootfile: a file or block device which becomes /dev/lgba
inside the guest.
root=/dev/lgba: this (and anything else on the command line) are
kernel boot parameters.
- Configuring networking. I usually have the host masquerade, using
"iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE" and "echo 1 >
/proc/sys/net/ipv4/ip_forward". In this example, I would configure
eth0 inside the guest at 192.168.19.2.
Another method is to bridge the tap device to an external interface
using --tunnet=bridge:<bridgename>, and perhaps run dhcp on the guest
to obtain an IP address. The bridge needs to be configured first:
this option simply adds the tap interface to it.
A simple example on my system:
ifconfig eth0 0.0.0.0
brctl addbr lg0
ifconfig lg0 up
brctl addif lg0 eth0
dhclient lg0
Then use --tunnet=bridge:lg0 when launching the guest.
See http://linux-net.osdl.org/index.php/Bridge for general information
on how to get bridging working.
- You can also create an inter-guest network using
"--sharenet=<filename>": any two guests using the same file are on
the same network. This file is created if it does not exist.
Lguest I/O model:
Lguest uses a simplified DMA model plus shared memory for I/O. Guests
can communicate with each other if they share underlying memory
(usually by the lguest program mmaping the same file), but they can
use any non-shared memory to communicate with the lguest process.
Guests can register DMA buffers at any key (must be a valid physical
address) using the LHCALL_BIND_DMA(key, dmabufs, num<<8|irq)
hypercall. "dmabufs" is the physical address of an array of "num"
"struct lguest_dma": each contains a used_len, and an array of
physical addresses and lengths. When a transfer occurs, the
"used_len" field of one of the buffers which has used_len 0 will be
set to the length transferred and the irq will fire.
Using an irq value of 0 unbinds the dma buffers.
To send DMA, the LHCALL_SEND_DMA(key, dma_physaddr) hypercall is used,
and the bytes used is written to the used_len field. This can be 0 if
noone else has bound a DMA buffer to that key or some other error.
DMA buffers bound by the same guest are ignored.
Cheers!
Rusty Russell rusty@rustcorp.com.au.
[1] These are on various places on the TODO list, waiting for you to
get annoyed enough at the limitation to fix it.
[2] Lguest is not yet tickless when idle. See [1].

120
Documentation/lockstat.txt Normal file
View File

@ -0,0 +1,120 @@
LOCK STATISTICS
- WHAT
As the name suggests, it provides statistics on locks.
- WHY
Because things like lock contention can severely impact performance.
- HOW
Lockdep already has hooks in the lock functions and maps lock instances to
lock classes. We build on that. The graph below shows the relation between
the lock functions and the various hooks therein.
__acquire
|
lock _____
| \
| __contended
| |
| <wait>
| _______/
|/
|
__acquired
|
.
<hold>
.
|
__release
|
unlock
lock, unlock - the regular lock functions
__* - the hooks
<> - states
With these hooks we provide the following statistics:
con-bounces - number of lock contention that involved x-cpu data
contentions - number of lock acquisitions that had to wait
wait time min - shortest (non-0) time we ever had to wait for a lock
max - longest time we ever had to wait for a lock
total - total time we spend waiting on this lock
acq-bounces - number of lock acquisitions that involved x-cpu data
acquisitions - number of times we took the lock
hold time min - shortest (non-0) time we ever held the lock
max - longest time we ever held the lock
total - total time this lock was held
From these number various other statistics can be derived, such as:
hold time average = hold time total / acquisitions
These numbers are gathered per lock class, per read/write state (when
applicable).
It also tracks 4 contention points per class. A contention point is a call site
that had to wait on lock acquisition.
- USAGE
Look at the current lock statistics:
( line numbers not part of actual output, done for clarity in the explanation
below )
# less /proc/lock_stat
01 lock_stat version 0.2
02 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total
04 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
05
06 &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60
07 &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38
08 --------------------------
09 &inode->i_data.tree_lock 0 [<ffffffff8027c08f>] add_to_page_cache+0x5f/0x190
10
11 ...............................................................................................................................................................................................
12
13 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24
14 -----------
15 dcache_lock 180 [<ffffffff802c0d7e>] sys_getcwd+0x11e/0x230
16 dcache_lock 165 [<ffffffff802c002a>] d_alloc+0x15a/0x210
17 dcache_lock 33 [<ffffffff8035818d>] _atomic_dec_and_lock+0x4d/0x70
18 dcache_lock 1 [<ffffffff802beef8>] shrink_dcache_parent+0x18/0x130
This excerpt shows the first two lock class statistics. Line 01 shows the
output version - each time the format changes this will be updated. Line 02-04
show the header with column descriptions. Lines 05-10 and 13-18 show the actual
statistics. These statistics come in two parts; the actual stats separated by a
short separator (line 08, 14) from the contention points.
The first lock (05-10) is a read/write lock, and shows two lines above the
short separator. The contention points don't match the column descriptors,
they have two: contentions and [<IP>] symbol.
View the top contending locks:
# grep : /proc/lock_stat | head
&inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60
&inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38
dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24
&inode->i_mutex: 161 286 18446744073709 62882.54 1244614.55 3653 20598 18446744073709 62318.60 1693822.74
&zone->lru_lock: 94 94 0.53 7.33 92.10 4366 32690 0.29 59.81 16350.06
&inode->i_data.i_mmap_lock: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44
&q->__queue_lock: 48 50 0.52 31.62 86.31 774 13131 0.17 113.08 12277.52
&rq->rq_lock_key: 43 47 0.74 68.50 170.63 3706 33929 0.22 107.99 17460.62
&rq->rq_lock_key#2: 39 46 0.75 6.68 49.03 2979 32292 0.17 125.17 17137.63
tasklist_lock-W: 15 15 1.45 10.87 32.70 1201 7390 0.58 62.55 13648.47
Clear the statistics:
# echo 0 > /proc/lock_stat

View File

@ -0,0 +1,322 @@
==============
Memory Hotplug
==============
Last Updated: Jul 28 2007
This document is about memory hotplug including how-to-use and current status.
Because Memory Hotplug is still under development, contents of this text will
be changed often.
1. Introduction
1.1 purpose of memory hotplug
1.2. Phases of memory hotplug
1.3. Unit of Memory online/offline operation
2. Kernel Configuration
3. sysfs files for memory hotplug
4. Physical memory hot-add phase
4.1 Hardware(Firmware) Support
4.2 Notify memory hot-add event by hand
5. Logical Memory hot-add phase
5.1. State of memory
5.2. How to online memory
6. Logical memory remove
6.1 Memory offline and ZONE_MOVABLE
6.2. How to offline memory
7. Physical memory remove
8. Future Work List
Note(1): x86_64's has special implementation for memory hotplug.
This text does not describe it.
Note(2): This text assumes that sysfs is mounted at /sys.
---------------
1. Introduction
---------------
1.1 purpose of memory hotplug
------------
Memory Hotplug allows users to increase/decrease the amount of memory.
Generally, there are two purposes.
(A) For changing the amount of memory.
This is to allow a feature like capacity on demand.
(B) For installing/removing DIMMs or NUMA-nodes physically.
This is to exchange DIMMs/NUMA-nodes, reduce power consumption, etc.
(A) is required by highly virtualized environments and (B) is required by
hardware which supports memory power management.
Linux memory hotplug is designed for both purpose.
1.2. Phases of memory hotplug
---------------
There are 2 phases in Memory Hotplug.
1) Physical Memory Hotplug phase
2) Logical Memory Hotplug phase.
The First phase is to communicate hardware/firmware and make/erase
environment for hotplugged memory. Basically, this phase is necessary
for the purpose (B), but this is good phase for communication between
highly virtualized environments too.
When memory is hotplugged, the kernel recognizes new memory, makes new memory
management tables, and makes sysfs files for new memory's operation.
If firmware supports notification of connection of new memory to OS,
this phase is triggered automatically. ACPI can notify this event. If not,
"probe" operation by system administration is used instead.
(see Section 4.).
Logical Memory Hotplug phase is to change memory state into
avaiable/unavailable for users. Amount of memory from user's view is
changed by this phase. The kernel makes all memory in it as free pages
when a memory range is available.
In this document, this phase is described as online/offline.
Logical Memory Hotplug phase is triggred by write of sysfs file by system
administrator. For the hot-add case, it must be executed after Physical Hotplug
phase by hand.
(However, if you writes udev's hotplug scripts for memory hotplug, these
phases can be execute in seamless way.)
1.3. Unit of Memory online/offline operation
------------
Memory hotplug uses SPARSEMEM memory model. SPARSEMEM divides the whole memory
into chunks of the same size. The chunk is called a "section". The size of
a section is architecture dependent. For example, power uses 16MiB, ia64 uses
1GiB. The unit of online/offline operation is "one section". (see Section 3.)
To determine the size of sections, please read this file:
/sys/devices/system/memory/block_size_bytes
This file shows the size of sections in byte.
-----------------------
2. Kernel Configuration
-----------------------
To use memory hotplug feature, kernel must be compiled with following
config options.
- For all memory hotplug
Memory model -> Sparse Memory (CONFIG_SPARSEMEM)
Allow for memory hot-add (CONFIG_MEMORY_HOTPLUG)
- To enable memory removal, the followings are also necessary
Allow for memory hot remove (CONFIG_MEMORY_HOTREMOVE)
Page Migration (CONFIG_MIGRATION)
- For ACPI memory hotplug, the followings are also necessary
Memory hotplug (under ACPI Support menu) (CONFIG_ACPI_HOTPLUG_MEMORY)
This option can be kernel module.
- As a related configuration, if your box has a feature of NUMA-node hotplug
via ACPI, then this option is necessary too.
ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu)
(CONFIG_ACPI_CONTAINER).
This option can be kernel module too.
--------------------------------
3 sysfs files for memory hotplug
--------------------------------
All sections have their device information under /sys/devices/system/memory as
/sys/devices/system/memory/memoryXXX
(XXX is section id.)
Now, XXX is defined as start_address_of_section / section_size.
For example, assume 1GiB section size. A device for a memory starting at
0x100000000 is /sys/device/system/memory/memory4
(0x100000000 / 1Gib = 4)
This device covers address range [0x100000000 ... 0x140000000)
Under each section, you can see 3 files.
/sys/devices/system/memory/memoryXXX/phys_index
/sys/devices/system/memory/memoryXXX/phys_device
/sys/devices/system/memory/memoryXXX/state
'phys_index' : read-only and contains section id, same as XXX.
'state' : read-write
at read: contains online/offline state of memory.
at write: user can specify "online", "offline" command
'phys_device': read-only: designed to show the name of physical memory device.
This is not well implemented now.
NOTE:
These directories/files appear after physical memory hotplug phase.
--------------------------------
4. Physical memory hot-add phase
--------------------------------
4.1 Hardware(Firmware) Support
------------
On x86_64/ia64 platform, memory hotplug by ACPI is supported.
In general, the firmware (ACPI) which supports memory hotplug defines
memory class object of _HID "PNP0C80". When a notify is asserted to PNP0C80,
Linux's ACPI handler does hot-add memory to the system and calls a hotplug udev
script. This will be done automatically.
But scripts for memory hotplug are not contained in generic udev package(now).
You may have to write it by yourself or online/offline memory by hand.
Please see "How to online memory", "How to offline memory" in this text.
If firmware supports NUMA-node hotplug, and defines an object _HID "ACPI0004",
"PNP0A05", or "PNP0A06", notification is asserted to it, and ACPI handler
calls hotplug code for all of objects which are defined in it.
If memory device is found, memory hotplug code will be called.
4.2 Notify memory hot-add event by hand
------------
In some environments, especially virtualized environment, firmware will not
notify memory hotplug event to the kernel. For such environment, "probe"
interface is supported. This interface depends on CONFIG_ARCH_MEMORY_PROBE.
Now, CONFIG_ARCH_MEMORY_PROBE is supported only by powerpc but it does not
contain highly architecture codes. Please add config if you need "probe"
interface.
Probe interface is located at
/sys/devices/system/memory/probe
You can tell the physical address of new memory to the kernel by
% echo start_address_of_new_memory > /sys/devices/system/memory/probe
Then, [start_address_of_new_memory, start_address_of_new_memory + section_size)
memory range is hot-added. In this case, hotplug script is not called (in
current implementation). You'll have to online memory by yourself.
Please see "How to online memory" in this text.
------------------------------
5. Logical Memory hot-add phase
------------------------------
5.1. State of memory
------------
To see (online/offline) state of memory section, read 'state' file.
% cat /sys/device/system/memory/memoryXXX/state
If the memory section is online, you'll read "online".
If the memory section is offline, you'll read "offline".
5.2. How to online memory
------------
Even if the memory is hot-added, it is not at ready-to-use state.
For using newly added memory, you have to "online" the memory section.
For onlining, you have to write "online" to the section's state file as:
% echo online > /sys/devices/system/memory/memoryXXX/state
After this, section memoryXXX's state will be 'online' and the amount of
available memory will be increased.
Currently, newly added memory is added as ZONE_NORMAL (for powerpc, ZONE_DMA).
This may be changed in future.
------------------------
6. Logical memory remove
------------------------
6.1 Memory offline and ZONE_MOVABLE
------------
Memory offlining is more complicated than memory online. Because memory offline
has to make the whole memory section be unused, memory offline can fail if
the section includes memory which cannot be freed.
In general, memory offline can use 2 techniques.
(1) reclaim and free all memory in the section.
(2) migrate all pages in the section.
In the current implementation, Linux's memory offline uses method (2), freeing
all pages in the section by page migration. But not all pages are
migratable. Under current Linux, migratable pages are anonymous pages and
page caches. For offlining a section by migration, the kernel has to guarantee
that the section contains only migratable pages.
Now, a boot option for making a section which consists of migratable pages is
supported. By specifying "kernelcore=" or "movablecore=" boot option, you can
create ZONE_MOVABLE...a zone which is just used for movable pages.
(See also Documentation/kernel-parameters.txt)
Assume the system has "TOTAL" amount of memory at boot time, this boot option
creates ZONE_MOVABLE as following.
1) When kernelcore=YYYY boot option is used,
Size of memory not for movable pages (not for offline) is YYYY.
Size of memory for movable pages (for offline) is TOTAL-YYYY.
2) When movablecore=ZZZZ boot option is used,
Size of memory not for movable pages (not for offline) is TOTAL - ZZZZ.
Size of memory for movable pages (for offline) is ZZZZ.
Note) Unfortunately, there is no information to show which section belongs
to ZONE_MOVABLE. This is TBD.
6.2. How to offline memory
------------
You can offline a section by using the same sysfs interface that was used in
memory onlining.
% echo offline > /sys/devices/system/memory/memoryXXX/state
If offline succeeds, the state of the memory section is changed to be "offline".
If it fails, some error core (like -EBUSY) will be returned by the kernel.
Even if a section does not belong to ZONE_MOVABLE, you can try to offline it.
If it doesn't contain 'unmovable' memory, you'll get success.
A section under ZONE_MOVABLE is considered to be able to be offlined easily.
But under some busy state, it may return -EBUSY. Even if a memory section
cannot be offlined due to -EBUSY, you can retry offlining it and may be able to
offline it (or not).
(For example, a page is referred to by some kernel internal call and released
soon.)
Consideration:
Memory hotplug's design direction is to make the possibility of memory offlining
higher and to guarantee unplugging memory under any situation. But it needs
more work. Returning -EBUSY under some situation may be good because the user
can decide to retry more or not by himself. Currently, memory offlining code
does some amount of retry with 120 seconds timeout.
-------------------------
7. Physical memory remove
-------------------------
Need more implementation yet....
- Notification completion of remove works by OS to firmware.
- Guard from remove if not yet.
--------------
8. Future Work
--------------
- allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
sysctl or new control file.
- showing memory section and physical device relationship.
- showing memory section and node relationship (maybe good for NUMA)
- showing memory section is under ZONE_MOVABLE or not
- test and make it better memory offlining.
- support HugeTLB page migration and offlining.
- memmap removing at memory offline.
- physical remove memory.

View File

@ -96,6 +96,9 @@ routing.txt
- the new routing mechanism
shaper.txt
- info on the module that can shape/limit transmitted traffic.
sk98lin.txt
- Marvell Yukon Chipset / SysKonnect SK-98xx compliant Gigabit
Ethernet Adapter family driver info
skfp.txt
- SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info.
smc9.txt

View File

@ -1,766 +0,0 @@
HISTORY:
February 16/2002 -- revision 0.2.1:
COR typo corrected
February 10/2002 -- revision 0.2:
some spell checking ;->
January 12/2002 -- revision 0.1
This is still work in progress so may change.
To keep up to date please watch this space.
Introduction to NAPI
====================
NAPI is a proven (www.cyberus.ca/~hadi/usenix-paper.tgz) technique
to improve network performance on Linux. For more details please
read that paper.
NAPI provides a "inherent mitigation" which is bound by system capacity
as can be seen from the following data collected by Robert on Gigabit
ethernet (e1000):
Psize Ipps Tput Rxint Txint Done Ndone
---------------------------------------------------------------
60 890000 409362 17 27622 7 6823
128 758150 464364 21 9301 10 7738
256 445632 774646 42 15507 21 12906
512 232666 994445 241292 19147 241192 1062
1024 119061 1000003 872519 19258 872511 0
1440 85193 1000003 946576 19505 946569 0
Legend:
"Ipps" stands for input packets per second.
"Tput" == packets out of total 1M that made it out.
"txint" == transmit completion interrupts seen
"Done" == The number of times that the poll() managed to pull all
packets out of the rx ring. Note from this that the lower the
load the more we could clean up the rxring
"Ndone" == is the converse of "Done". Note again, that the higher
the load the more times we couldn't clean up the rxring.
Observe that:
when the NIC receives 890Kpackets/sec only 17 rx interrupts are generated.
The system cant handle the processing at 1 interrupt/packet at that load level.
At lower rates on the other hand, rx interrupts go up and therefore the
interrupt/packet ratio goes up (as observable from that table). So there is
possibility that under low enough input, you get one poll call for each
input packet caused by a single interrupt each time. And if the system
cant handle interrupt per packet ratio of 1, then it will just have to
chug along ....
0) Prerequisites:
==================
A driver MAY continue using the old 2.4 technique for interfacing
to the network stack and not benefit from the NAPI changes.
NAPI additions to the kernel do not break backward compatibility.
NAPI, however, requires the following features to be available:
A) DMA ring or enough RAM to store packets in software devices.
B) Ability to turn off interrupts or maybe events that send packets up
the stack.
NAPI processes packet events in what is known as dev->poll() method.
Typically, only packet receive events are processed in dev->poll().
The rest of the events MAY be processed by the regular interrupt handler
to reduce processing latency (justified also because there are not that
many of them).
Note, however, NAPI does not enforce that dev->poll() only processes
receive events.
Tests with the tulip driver indicated slightly increased latency if
all of the interrupt handler is moved to dev->poll(). Also MII handling
gets a little trickier.
The example used in this document is to move the receive processing only
to dev->poll(); this is shown with the patch for the tulip driver.
For an example of code that moves all the interrupt driver to
dev->poll() look at the ported e1000 code.
There are caveats that might force you to go with moving everything to
dev->poll(). Different NICs work differently depending on their status/event
acknowledgement setup.
There are two types of event register ACK mechanisms.
I) what is known as Clear-on-read (COR).
when you read the status/event register, it clears everything!
The natsemi and sunbmac NICs are known to do this.
In this case your only choice is to move all to dev->poll()
II) Clear-on-write (COW)
i) you clear the status by writing a 1 in the bit-location you want.
These are the majority of the NICs and work the best with NAPI.
Put only receive events in dev->poll(); leave the rest in
the old interrupt handler.
ii) whatever you write in the status register clears every thing ;->
Cant seem to find any supported by Linux which do this. If
someone knows such a chip email us please.
Move all to dev->poll()
C) Ability to detect new work correctly.
NAPI works by shutting down event interrupts when there's work and
turning them on when there's none.
New packets might show up in the small window while interrupts were being
re-enabled (refer to appendix 2). A packet might sneak in during the period
we are enabling interrupts. We only get to know about such a packet when the
next new packet arrives and generates an interrupt.
Essentially, there is a small window of opportunity for a race condition
which for clarity we'll refer to as the "rotting packet".
This is a very important topic and appendix 2 is dedicated for more
discussion.
Locking rules and environmental guarantees
==========================================
-Guarantee: Only one CPU at any time can call dev->poll(); this is because
only one CPU can pick the initial interrupt and hence the initial
netif_rx_schedule(dev);
- The core layer invokes devices to send packets in a round robin format.
This implies receive is totally lockless because of the guarantee that only
one CPU is executing it.
- contention can only be the result of some other CPU accessing the rx
ring. This happens only in close() and suspend() (when these methods
try to clean the rx ring);
****guarantee: driver authors need not worry about this; synchronization
is taken care for them by the top net layer.
-local interrupts are enabled (if you dont move all to dev->poll()). For
example link/MII and txcomplete continue functioning just same old way.
This improves the latency of processing these events. It is also assumed that
the receive interrupt is the largest cause of noise. Note this might not
always be true.
[according to Manfred Spraul, the winbond insists on sending one
txmitcomplete interrupt for each packet (although this can be mitigated)].
For these broken drivers, move all to dev->poll().
For the rest of this text, we'll assume that dev->poll() only
processes receive events.
new methods introduce by NAPI
=============================
a) netif_rx_schedule(dev)
Called by an IRQ handler to schedule a poll for device
b) netif_rx_schedule_prep(dev)
puts the device in a state which allows for it to be added to the
CPU polling list if it is up and running. You can look at this as
the first half of netif_rx_schedule(dev) above; the second half
being c) below.
c) __netif_rx_schedule(dev)
Add device to the poll list for this CPU; assuming that _prep above
has already been called and returned 1.
d) netif_rx_reschedule(dev, undo)
Called to reschedule polling for device specifically for some
deficient hardware. Read Appendix 2 for more details.
e) netif_rx_complete(dev)
Remove interface from the CPU poll list: it must be in the poll list
on current cpu. This primitive is called by dev->poll(), when
it completes its work. The device cannot be out of poll list at this
call, if it is then clearly it is a BUG(). You'll know ;->
All of the above methods are used below, so keep reading for clarity.
Device driver changes to be made when porting NAPI
==================================================
Below we describe what kind of changes are required for NAPI to work.
1) introduction of dev->poll() method
=====================================
This is the method that is invoked by the network core when it requests
for new packets from the driver. A driver is allowed to send upto
dev->quota packets by the current CPU before yielding to the network
subsystem (so other devices can also get opportunity to send to the stack).
dev->poll() prototype looks as follows:
int my_poll(struct net_device *dev, int *budget)
budget is the remaining number of packets the network subsystem on the
current CPU can send up the stack before yielding to other system tasks.
*Each driver is responsible for decrementing budget by the total number of
packets sent.
Total number of packets cannot exceed dev->quota.
dev->poll() method is invoked by the top layer, the driver just sends if it
can to the stack the packet quantity requested.
more on dev->poll() below after the interrupt changes are explained.
2) registering dev->poll() method
===================================
dev->poll should be set in the dev->probe() method.
e.g:
dev->open = my_open;
.
.
/* two new additions */
/* first register my poll method */
dev->poll = my_poll;
/* next register my weight/quanta; can be overridden in /proc */
dev->weight = 16;
.
.
dev->stop = my_close;
3) scheduling dev->poll()
=============================
This involves modifying the interrupt handler and the code
path which takes the packet off the NIC and sends them to the
stack.
it's important at this point to introduce the classical D Becker
interrupt processor:
------------------
static irqreturn_t
netdevice_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
struct net_device *dev = (struct net_device *)dev_instance;
struct my_private *tp = (struct my_private *)dev->priv;
int work_count = my_work_count;
status = read_interrupt_status_reg();
if (status == 0)
return IRQ_NONE; /* Shared IRQ: not us */
if (status == 0xffff)
return IRQ_HANDLED; /* Hot unplug */
if (status & error)
do_some_error_handling()
do {
acknowledge_ints_ASAP();
if (status & link_interrupt) {
spin_lock(&tp->link_lock);
do_some_link_stat_stuff();
spin_lock(&tp->link_lock);
}
if (status & rx_interrupt) {
receive_packets(dev);
}
if (status & rx_nobufs) {
make_rx_buffs_avail();
}
if (status & tx_related) {
spin_lock(&tp->lock);
tx_ring_free(dev);
if (tx_died)
restart_tx();
spin_unlock(&tp->lock);
}
status = read_interrupt_status_reg();
} while (!(status & error) || more_work_to_be_done);
return IRQ_HANDLED;
}
----------------------------------------------------------------------
We now change this to what is shown below to NAPI-enable it:
----------------------------------------------------------------------
static irqreturn_t
netdevice_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
struct net_device *dev = (struct net_device *)dev_instance;
struct my_private *tp = (struct my_private *)dev->priv;
status = read_interrupt_status_reg();
if (status == 0)
return IRQ_NONE; /* Shared IRQ: not us */
if (status == 0xffff)
return IRQ_HANDLED; /* Hot unplug */
if (status & error)
do_some_error_handling();
do {
/************************ start note *********************************/
acknowledge_ints_ASAP(); // dont ack rx and rxnobuff here
/************************ end note *********************************/
if (status & link_interrupt) {
spin_lock(&tp->link_lock);
do_some_link_stat_stuff();
spin_unlock(&tp->link_lock);
}
/************************ start note *********************************/
if (status & rx_interrupt || (status & rx_nobuffs)) {
if (netif_rx_schedule_prep(dev)) {
/* disable interrupts caused
* by arriving packets */
disable_rx_and_rxnobuff_ints();
/* tell system we have work to be done. */
__netif_rx_schedule(dev);
} else {
printk("driver bug! interrupt while in poll\n");
/* FIX by disabling interrupts */
disable_rx_and_rxnobuff_ints();
}
}
/************************ end note note *********************************/
if (status & tx_related) {
spin_lock(&tp->lock);
tx_ring_free(dev);
if (tx_died)
restart_tx();
spin_unlock(&tp->lock);
}
status = read_interrupt_status_reg();
/************************ start note *********************************/
} while (!(status & error) || more_work_to_be_done(status));
/************************ end note note *********************************/
return IRQ_HANDLED;
}
---------------------------------------------------------------------
We note several things from above:
I) Any interrupt source which is caused by arriving packets is now
turned off when it occurs. Depending on the hardware, there could be
several reasons that arriving packets would cause interrupts; these are the
interrupt sources we wish to avoid. The two common ones are a) a packet
arriving (rxint) b) a packet arriving and finding no DMA buffers available
(rxnobuff) .
This means also acknowledge_ints_ASAP() will not clear the status
register for those two items above; clearing is done in the place where
proper work is done within NAPI; at the poll() and refill_rx_ring()
discussed further below.
netif_rx_schedule_prep() returns 1 if device is in running state and
gets successfully added to the core poll list. If we get a zero value
we can _almost_ assume are already added to the list (instead of not running.
Logic based on the fact that you shouldn't get interrupt if not running)
We rectify this by disabling rx and rxnobuf interrupts.
II) that receive_packets(dev) and make_rx_buffs_avail() may have disappeared.
These functionalities are still around actually......
infact, receive_packets(dev) is very close to my_poll() and
make_rx_buffs_avail() is invoked from my_poll()
4) converting receive_packets() to dev->poll()
===============================================
We need to convert the classical D Becker receive_packets(dev) to my_poll()
First the typical receive_packets() below:
-------------------------------------------------------------------
/* this is called by interrupt handler */
static void receive_packets (struct net_device *dev)
{
struct my_private *tp = (struct my_private *)dev->priv;
rx_ring = tp->rx_ring;
cur_rx = tp->cur_rx;
int entry = cur_rx % RX_RING_SIZE;
int received = 0;
int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx;
while (rx_ring_not_empty) {
u32 rx_status;
unsigned int rx_size;
unsigned int pkt_size;
struct sk_buff *skb;
/* read size+status of next frame from DMA ring buffer */
/* the number 16 and 4 are just examples */
rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset));
rx_size = rx_status >> 16;
pkt_size = rx_size - 4;
/* process errors */
if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) ||
(!(rx_status & RxStatusOK))) {
netdrv_rx_err (rx_status, dev, tp, ioaddr);
return;
}
if (--rx_work_limit < 0)
break;
/* grab a skb */
skb = dev_alloc_skb (pkt_size + 2);
if (skb) {
.
.
netif_rx (skb);
.
.
} else { /* OOM */
/*seems very driver specific ... some just pass
whatever is on the ring already. */
}
/* move to the next skb on the ring */
entry = (++tp->cur_rx) % RX_RING_SIZE;
received++ ;
}
/* store current ring pointer state */
tp->cur_rx = cur_rx;
/* Refill the Rx ring buffers if they are needed */
refill_rx_ring();
.
.
}
-------------------------------------------------------------------
We change it to a new one below; note the additional parameter in
the call.
-------------------------------------------------------------------
/* this is called by the network core */
static int my_poll (struct net_device *dev, int *budget)
{
struct my_private *tp = (struct my_private *)dev->priv;
rx_ring = tp->rx_ring;
cur_rx = tp->cur_rx;
int entry = cur_rx % RX_BUF_LEN;
/* maximum packets to send to the stack */
/************************ note note *********************************/
int rx_work_limit = dev->quota;
/************************ end note note *********************************/
do { // outer beginning loop starts here
clear_rx_status_register_bit();
while (rx_ring_not_empty) {
u32 rx_status;
unsigned int rx_size;
unsigned int pkt_size;
struct sk_buff *skb;
/* read size+status of next frame from DMA ring buffer */
/* the number 16 and 4 are just examples */
rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset));
rx_size = rx_status >> 16;
pkt_size = rx_size - 4;
/* process errors */
if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) ||
(!(rx_status & RxStatusOK))) {
netdrv_rx_err (rx_status, dev, tp, ioaddr);
return 1;
}
/************************ note note *********************************/
if (--rx_work_limit < 0) { /* we got packets, but no quota */
/* store current ring pointer state */
tp->cur_rx = cur_rx;
/* Refill the Rx ring buffers if they are needed */
refill_rx_ring(dev);
goto not_done;
}
/********************** end note **********************************/
/* grab a skb */
skb = dev_alloc_skb (pkt_size + 2);
if (skb) {
.
.
/************************ note note *********************************/
netif_receive_skb (skb);
/********************** end note **********************************/
.
.
} else { /* OOM */
/*seems very driver specific ... common is just pass
whatever is on the ring already. */
}
/* move to the next skb on the ring */
entry = (++tp->cur_rx) % RX_RING_SIZE;
received++ ;
}
/* store current ring pointer state */
tp->cur_rx = cur_rx;
/* Refill the Rx ring buffers if they are needed */
refill_rx_ring(dev);
/* no packets on ring; but new ones can arrive since we last
checked */
status = read_interrupt_status_reg();
if (rx status is not set) {
/* If something arrives in this narrow window,
an interrupt will be generated */
goto done;
}
/* done! at least that's what it looks like ;->
if new packets came in after our last check on status bits
they'll be caught by the while check and we go back and clear them
since we havent exceeded our quota */
} while (rx_status_is_set);
done:
/************************ note note *********************************/
dev->quota -= received;
*budget -= received;
/* If RX ring is not full we are out of memory. */
if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
goto oom;
/* we are happy/done, no more packets on ring; put us back
to where we can start processing interrupts again */
netif_rx_complete(dev);
enable_rx_and_rxnobuf_ints();
/* The last op happens after poll completion. Which means the following:
* 1. it can race with disabling irqs in irq handler (which are done to
* schedule polls)
* 2. it can race with dis/enabling irqs in other poll threads
* 3. if an irq raised after the beginning of the outer beginning
* loop (marked in the code above), it will be immediately
* triggered here.
*
* Summarizing: the logic may result in some redundant irqs both
* due to races in masking and due to too late acking of already
* processed irqs. The good news: no events are ever lost.
*/
return 0; /* done */
not_done:
if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/2 ||
tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
refill_rx_ring(dev);
if (!received) {
printk("received==0\n");
received = 1;
}
dev->quota -= received;
*budget -= received;
return 1; /* not_done */
oom:
/* Start timer, stop polling, but do not enable rx interrupts. */
start_poll_timer(dev);
return 0; /* we'll take it from here so tell core "done"*/
/************************ End note note *********************************/
}
-------------------------------------------------------------------
From above we note that:
0) rx_work_limit = dev->quota
1) refill_rx_ring() is in charge of clearing the bit for rxnobuff when
it does the work.
2) We have a done and not_done state.
3) instead of netif_rx() we call netif_receive_skb() to pass the skb.
4) we have a new way of handling oom condition
5) A new outer for (;;) loop has been added. This serves the purpose of
ensuring that if a new packet has come in, after we are all set and done,
and we have not exceeded our quota that we continue sending packets up.
-----------------------------------------------------------
Poll timer code will need to do the following:
a)
if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/2 ||
tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
refill_rx_ring(dev);
/* If RX ring is not full we are still out of memory.
Restart the timer again. Else we re-add ourselves
to the master poll list.
*/
if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
restart_timer();
else netif_rx_schedule(dev); /* we are back on the poll list */
5) dev->close() and dev->suspend() issues
==========================================
The driver writer needn't worry about this; the top net layer takes
care of it.
6) Adding new Stats to /proc
=============================
In order to debug some of the new features, we introduce new stats
that need to be collected.
TODO: Fill this later.
APPENDIX 1: discussion on using ethernet HW FC
==============================================
Most chips with FC only send a pause packet when they run out of Rx buffers.
Since packets are pulled off the DMA ring by a softirq in NAPI,
if the system is slow in grabbing them and we have a high input
rate (faster than the system's capacity to remove packets), then theoretically
there will only be one rx interrupt for all packets during a given packetstorm.
Under low load, we might have a single interrupt per packet.
FC should be programmed to apply in the case when the system cant pull out
packets fast enough i.e send a pause only when you run out of rx buffers.
Note FC in itself is a good solution but we have found it to not be
much of a commodity feature (both in NICs and switches) and hence falls
under the same category as using NIC based mitigation. Also, experiments
indicate that it's much harder to resolve the resource allocation
issue (aka lazy receiving that NAPI offers) and hence quantify its usefulness
proved harder. In any case, FC works even better with NAPI but is not
necessary.
APPENDIX 2: the "rotting packet" race-window avoidance scheme
=============================================================
There are two types of associations seen here
1) status/int which honors level triggered IRQ
If a status bit for receive or rxnobuff is set and the corresponding
interrupt-enable bit is not on, then no interrupts will be generated. However,
as soon as the "interrupt-enable" bit is unmasked, an immediate interrupt is
generated. [assuming the status bit was not turned off].
Generally the concept of level triggered IRQs in association with a status and
interrupt-enable CSR register set is used to avoid the race.
If we take the example of the tulip:
"pending work" is indicated by the status bit(CSR5 in tulip).
the corresponding interrupt bit (CSR7 in tulip) might be turned off (but
the CSR5 will continue to be turned on with new packet arrivals even if
we clear it the first time)
Very important is the fact that if we turn on the interrupt bit on when
status is set that an immediate irq is triggered.
If we cleared the rx ring and proclaimed there was "no more work
to be done" and then went on to do a few other things; then when we enable
interrupts, there is a possibility that a new packet might sneak in during
this phase. It helps to look at the pseudo code for the tulip poll
routine:
--------------------------
do {
ACK;
while (ring_is_not_empty()) {
work-work-work
if quota is exceeded: exit, no touching irq status/mask
}
/* No packets, but new can arrive while we are doing this*/
CSR5 := read
if (CSR5 is not set) {
/* If something arrives in this narrow window here,
* where the comments are ;-> irq will be generated */
unmask irqs;
exit poll;
}
} while (rx_status_is_set);
------------------------
CSR5 bit of interest is only the rx status.
If you look at the last if statement:
you just finished grabbing all the packets from the rx ring .. you check if
status bit says there are more packets just in ... it says none; you then
enable rx interrupts again; if a new packet just came in during this check,
we are counting that CSR5 will be set in that small window of opportunity
and that by re-enabling interrupts, we would actually trigger an interrupt
to register the new packet for processing.
[The above description nay be very verbose, if you have better wording
that will make this more understandable, please suggest it.]
2) non-capable hardware
These do not generally respect level triggered IRQs. Normally,
irqs may be lost while being masked and the only way to leave poll is to do
a double check for new input after netif_rx_complete() is invoked
and re-enable polling (after seeing this new input).
Sample code:
---------
.
.
restart_poll:
while (ring_is_not_empty()) {
work-work-work
if quota is exceeded: exit, not touching irq status/mask
}
.
.
.
enable_rx_interrupts()
netif_rx_complete(dev);
if (ring_has_new_packet() && netif_rx_reschedule(dev, received)) {
disable_rx_and_rxnobufs()
goto restart_poll
} while (rx_status_is_set);
---------
Basically netif_rx_complete() removes us from the poll list, but because a
new packet which will never be caught due to the possibility of a race
might come in, we attempt to re-add ourselves to the poll list.
APPENDIX 3: Scheduling issues.
==============================
As seen NAPI moves processing to softirq level. Linux uses the ksoftirqd as the
general solution to schedule softirq's to run before next interrupt and by putting
them under scheduler control. Also this prevents consecutive softirq's from
monopolize the CPU. This also have the effect that the priority of ksoftirq needs
to be considered when running very CPU-intensive applications and networking to
get the proper balance of softirq/user balance. Increasing ksoftirq priority to 0
(eventually more) is reported cure problems with low network performance at high
CPU load.
Most used processes in a GIGE router:
USER PID %CPU %MEM SIZE RSS TTY STAT START TIME COMMAND
root 3 0.2 0.0 0 0 ? RWN Aug 15 602:00 (ksoftirqd_CPU0)
root 232 0.0 7.9 41400 40884 ? S Aug 15 74:12 gated
--------------------------------------------------------------------
relevant sites:
==================
ftp://robur.slu.se/pub/Linux/net-development/NAPI/
--------------------------------------------------------------------
TODO: Write net-skeleton.c driver.
-------------------------------------------------------------
Authors:
========
Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Jamal Hadi Salim <hadi@cyberus.ca>
Robert Olsson <Robert.Olsson@data.slu.se>
Acknowledgements:
================
People who made this document better:
Lennert Buytenhek <buytenh@gnu.org>
Andrew Morton <akpm@zip.com.au>
Manfred Spraul <manfred@colorfullife.com>
Donald Becker <becker@scyld.com>
Jeff Garzik <jgarzik@pobox.com>

View File

@ -38,8 +38,13 @@ Socket options
DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
the socket will fall back to 0 (which means that no meaningful service code
is present). Connecting sockets set at most one service option; for
listening sockets, multiple service codes can be specified.
is present). On active sockets this is set before connect(); specifying more
than one code has no effect (all subsequent service codes are ignored). The
case is different for passive sockets, where multiple service codes (up to 32)
can be set before calling bind().
DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
size (application payload size) in bytes, see RFC 4340, section 14.
DCCP_SOCKOPT_SEND_CSCOV and DCCP_SOCKOPT_RECV_CSCOV are used for setting the
partial checksum coverage (RFC 4340, sec. 9.2). The default is that checksums
@ -50,12 +55,13 @@ be enabled at the receiver, too with suitable choice of CsCov.
DCCP_SOCKOPT_SEND_CSCOV sets the sender checksum coverage. Values in the
range 0..15 are acceptable. The default setting is 0 (full coverage),
values between 1..15 indicate partial coverage.
DCCP_SOCKOPT_SEND_CSCOV is for the receiver and has a different meaning: it
DCCP_SOCKOPT_RECV_CSCOV is for the receiver and has a different meaning: it
sets a threshold, where again values 0..15 are acceptable. The default
of 0 means that all packets with a partial coverage will be discarded.
Values in the range 1..15 indicate that packets with minimally such a
coverage value are also acceptable. The higher the number, the more
restrictive this setting (see [RFC 4340, sec. 9.2.1]).
restrictive this setting (see [RFC 4340, sec. 9.2.1]). Partial coverage
settings are inherited to the child socket after accept().
The following two options apply to CCID 3 exclusively and are getsockopt()-only.
In either case, a TFRC info struct (defined in <linux/tfrc.h>) is returned.
@ -112,9 +118,14 @@ tx_qlen = 5
The size of the transmit buffer in packets. A value of 0 corresponds
to an unbounded transmit buffer.
sync_ratelimit = 125 ms
The timeout between subsequent DCCP-Sync packets sent in response to
sequence-invalid packets on the same socket (RFC 4340, 7.5.4). The unit
of this parameter is milliseconds; a value of 0 disables rate-limiting.
Notes
=====
DCCP does not travel through NAT successfully at present on many boxes. This is
because the checksum covers the psuedo-header as per TCP and UDP. Linux NAT
because the checksum covers the pseudo-header as per TCP and UDP. Linux NAT
support for DCCP has been added.

View File

@ -1,52 +0,0 @@
The Digi International RightSwitch SE-X (dgrs) Device Driver
This is a Linux driver for the Digi International RightSwitch SE-X
EISA and PCI boards. These are 4 (EISA) or 6 (PCI) port Ethernet
switches and a NIC combined into a single board. This driver can
be compiled into the kernel statically or as a loadable module.
There is also a companion management tool, called "xrightswitch".
The management tool lets you watch the performance graphically,
as well as set the SNMP agent IP and IPX addresses, IEEE Spanning
Tree, and Aging time. These can also be set from the command line
when the driver is loaded. The driver command line options are:
debug=NNN Debug printing level
dma=0/1 Disable/Enable DMA on PCI card
spantree=0/1 Disable/Enable IEEE spanning tree
hashexpire=NNN Change address aging time (default 300 seconds)
ipaddr=A,B,C,D Set SNMP agent IP address i.e. 199,86,8,221
iptrap=A,B,C,D Set SNMP agent IP trap address i.e. 199,86,8,221
ipxnet=NNN Set SNMP agent IPX network number
nicmode=0/1 Disable/Enable multiple NIC mode
There is also a tool for setting up input and output packet filters
on each port, called "dgrsfilt".
Both the management tool and the filtering tool are available
separately from the following FTP site:
ftp://ftp.dgii.com/drivers/rightswitch/linux/
When nicmode=1, the board and driver operate as 4 or 6 individual
NIC ports (eth0...eth5) instead of as a switch. All switching
functions are disabled. In the future, the board firmware may include
a routing cache when in this mode.
Copyright 1995-1996 Digi International Inc.
This software may be used and distributed according to the terms
of the GNU General Public License, incorporated herein by reference.
For information on purchasing a RightSwitch SE-4 or SE-6
board, please contact Digi's sales department at 1-612-912-3444
or 1-800-DIGIBRD. Outside the U.S., please check our Web page at:
http://www.dgii.com
for sales offices worldwide. Tech support is also available through
the channels listed on the Web site, although as long as I am
employed on networking products at Digi I will be happy to provide
any bug fixes that may be needed.
-Rick Richardson, rick@dgii.com

View File

@ -180,13 +180,20 @@ tcp_fin_timeout - INTEGER
to live longer. Cf. tcp_max_orphans.
tcp_frto - INTEGER
Enables F-RTO, an enhanced recovery algorithm for TCP retransmission
Enables Forward RTO-Recovery (F-RTO) defined in RFC4138.
F-RTO is an enhanced recovery algorithm for TCP retransmission
timeouts. It is particularly beneficial in wireless environments
where packet loss is typically due to random radio interference
rather than intermediate router congestion. If set to 1, basic
version is enabled. 2 enables SACK enhanced F-RTO, which is
EXPERIMENTAL. The basic version can be used also when SACK is
enabled for a flow through tcp_sack sysctl.
rather than intermediate router congestion. FRTO is sender-side
only modification. Therefore it does not require any support from
the peer, but in a typical case, however, where wireless link is
the local access link and most of the data flows downlink, the
faraway servers should have FRTO enabled to take advantage of it.
If set to 1, basic version is enabled. 2 enables SACK enhanced
F-RTO if flow uses SACK. The basic version can be used also when
SACK is in use though scenario(s) with it exists where FRTO
interacts badly with the packet counting of the SACK enabled TCP
flow.
tcp_frto_response - INTEGER
When F-RTO has detected that a TCP retransmission timeout was

View File

@ -13,15 +13,35 @@ The radiotap format is discussed in
./Documentation/networking/radiotap-headers.txt.
Despite 13 radiotap argument types are currently defined, most only make sense
to appear on received packets. Currently three kinds of argument are used by
the injection code, although it knows to skip any other arguments that are
present (facilitating replay of captured radiotap headers directly):
to appear on received packets. The following information is parsed from the
radiotap headers and used to control injection:
- IEEE80211_RADIOTAP_RATE - u8 arg in 500kbps units (0x02 --> 1Mbps)
* IEEE80211_RADIOTAP_RATE
- IEEE80211_RADIOTAP_ANTENNA - u8 arg, 0x00 = ant1, 0x01 = ant2
rate in 500kbps units, automatic if invalid or not present
- IEEE80211_RADIOTAP_DBM_TX_POWER - u8 arg, dBm
* IEEE80211_RADIOTAP_ANTENNA
antenna to use, automatic if not present
* IEEE80211_RADIOTAP_DBM_TX_POWER
transmit power in dBm, automatic if not present
* IEEE80211_RADIOTAP_FLAGS
IEEE80211_RADIOTAP_F_FCS: FCS will be removed and recalculated
IEEE80211_RADIOTAP_F_WEP: frame will be encrypted if key available
IEEE80211_RADIOTAP_F_FRAG: frame will be fragmented if longer than the
current fragmentation threshold. Note that
this flag is only reliable when software
fragmentation is enabled)
The injection code can also skip all other currently defined radiotap fields
facilitating replay of captured radiotap headers directly.
Here is an example valid radiotap header defining these three parameters

View File

@ -58,9 +58,13 @@ software, so it's a straight round-robin qdisc. It uses the same syntax and
classification priomap that sch_prio uses, so it should be intuitive to
configure for people who've used sch_prio.
The PRIO qdisc naturally plugs into a multiqueue device. If PRIO has been
built with NET_SCH_PRIO_MQ, then upon load, it will make sure the number of
bands requested is equal to the number of queues on the hardware. If they
In order to utilitize the multiqueue features of the qdiscs, the network
device layer needs to enable multiple queue support. This can be done by
selecting NETDEVICES_MULTIQUEUE under Drivers.
The PRIO qdisc naturally plugs into a multiqueue device. If
NETDEVICES_MULTIQUEUE is selected, then on qdisc load, the number of
bands requested is compared to the number of queues on the hardware. If they
are equal, it sets a one-to-one mapping up between the queues and bands. If
they're not equal, it will not load the qdisc. This is the same behavior
for RR. Once the association is made, any skb that is classified will have

View File

@ -3,6 +3,10 @@ started by Ingo Molnar <mingo@redhat.com>, 2001.09.17
2.6 port and netpoll api by Matt Mackall <mpm@selenic.com>, Sep 9 2003
Please send bug reports to Matt Mackall <mpm@selenic.com>
and Satyam Sharma <satyam.sharma@gmail.com>
Introduction:
=============
This module logs kernel printk messages over UDP allowing debugging of
problem where disk logging fails and serial consoles are impractical.
@ -13,6 +17,9 @@ the specified interface as soon as possible. While this doesn't allow
capture of early kernel panics, it does capture most of the boot
process.
Sender and receiver configuration:
==================================
It takes a string configuration parameter "netconsole" in the
following format:
@ -34,21 +41,113 @@ Examples:
insmod netconsole netconsole=@/,@10.0.0.2/
It also supports logging to multiple remote agents by specifying
parameters for the multiple agents separated by semicolons and the
complete string enclosed in "quotes", thusly:
modprobe netconsole netconsole="@/,@10.0.0.2/;@/eth1,6892@10.0.0.3/"
Built-in netconsole starts immediately after the TCP stack is
initialized and attempts to bring up the supplied dev at the supplied
address.
The remote host can run either 'netcat -u -l -p <port>' or syslogd.
Dynamic reconfiguration:
========================
Dynamic reconfigurability is a useful addition to netconsole that enables
remote logging targets to be dynamically added, removed, or have their
parameters reconfigured at runtime from a configfs-based userspace interface.
[ Note that the parameters of netconsole targets that were specified/created
from the boot/module option are not exposed via this interface, and hence
cannot be modified dynamically. ]
To include this feature, select CONFIG_NETCONSOLE_DYNAMIC when building the
netconsole module (or kernel, if netconsole is built-in).
Some examples follow (where configfs is mounted at the /sys/kernel/config
mountpoint).
To add a remote logging target (target names can be arbitrary):
cd /sys/kernel/config/netconsole/
mkdir target1
Note that newly created targets have default parameter values (as mentioned
above) and are disabled by default -- they must first be enabled by writing
"1" to the "enabled" attribute (usually after setting parameters accordingly)
as described below.
To remove a target:
rmdir /sys/kernel/config/netconsole/othertarget/
The interface exposes these parameters of a netconsole target to userspace:
enabled Is this target currently enabled? (read-write)
dev_name Local network interface name (read-write)
local_port Source UDP port to use (read-write)
remote_port Remote agent's UDP port (read-write)
local_ip Source IP address to use (read-write)
remote_ip Remote agent's IP address (read-write)
local_mac Local interface's MAC address (read-only)
remote_mac Remote agent's MAC address (read-write)
The "enabled" attribute is also used to control whether the parameters of
a target can be updated or not -- you can modify the parameters of only
disabled targets (i.e. if "enabled" is 0).
To update a target's parameters:
cat enabled # check if enabled is 1
echo 0 > enabled # disable the target (if required)
echo eth2 > dev_name # set local interface
echo 10.0.0.4 > remote_ip # update some parameter
echo cb:a9:87:65:43:21 > remote_mac # update more parameters
echo 1 > enabled # enable target again
You can also update the local interface dynamically. This is especially
useful if you want to use interfaces that have newly come up (and may not
have existed when netconsole was loaded / initialized).
Miscellaneous notes:
====================
WARNING: the default target ethernet setting uses the broadcast
ethernet address to send packets, which can cause increased load on
other systems on the same ethernet segment.
TIP: some LAN switches may be configured to suppress ethernet broadcasts
so it is advised to explicitly specify the remote agents' MAC addresses
from the config parameters passed to netconsole.
TIP: to find out the MAC address of, say, 10.0.0.2, you may try using:
ping -c 1 10.0.0.2 ; /sbin/arp -n | grep 10.0.0.2
TIP: in case the remote logging agent is on a separate LAN subnet than
the sender, it is suggested to try specifying the MAC address of the
default gateway (you may use /sbin/route -n to find it out) as the
remote MAC address instead.
NOTE: the network device (eth1 in the above case) can run any kind
of other network traffic, netconsole is not intrusive. Netconsole
might cause slight delays in other traffic if the volume of kernel
messages is high, but should have no other impact.
NOTE: if you find that the remote logging agent is not receiving or
printing all messages from the sender, it is likely that you have set
the "console_loglevel" parameter (on the sender) to only send high
priority messages to the console. You can change this at runtime using:
dmesg -n 8
or by specifying "debug" on the kernel command line at boot, to send
all kernel messages to the console. A specific value for this parameter
can also be set using the "loglevel" kernel boot option. See the
dmesg(8) man page and Documentation/kernel-parameters.txt for details.
Netconsole was designed to be as instantaneous as possible, to
enable the logging of even the most critical kernel bugs. It works
from IRQ contexts as well, and does not enable interrupts while

View File

@ -73,7 +73,8 @@ dev->hard_start_xmit:
has to lock by itself when needed. It is recommended to use a try lock
for this and return NETDEV_TX_LOCKED when the spin lock fails.
The locking there should also properly protect against
set_multicast_list.
set_multicast_list. Note that the use of NETIF_F_LLTX is deprecated.
Dont use it for new drivers.
Context: Process with BHs disabled or BH (timer),
will be called with interrupts disabled by netconsole.
@ -95,9 +96,13 @@ dev->set_multicast_list:
Synchronization: netif_tx_lock spinlock.
Context: BHs disabled
dev->poll:
Synchronization: __LINK_STATE_RX_SCHED bit in dev->state. See
dev_close code and comments in net/core/dev.c for more info.
struct napi_struct synchronization rules
========================================
napi->poll:
Synchronization: NAPI_STATE_SCHED bit in napi->state. Device
driver's dev->close method will invoke napi_disable() on
all NAPI instances which will do a sleeping poll on the
NAPI_STATE_SCHED napi->state bit, waiting for all pending
NAPI activity to cease.
Context: softirq
will be called with interrupts disabled by netconsole.

View File

@ -0,0 +1,568 @@
(C)Copyright 1999-2004 Marvell(R).
All rights reserved
===========================================================================
sk98lin.txt created 13-Feb-2004
Readme File for sk98lin v6.23
Marvell Yukon/SysKonnect SK-98xx Gigabit Ethernet Adapter family driver for LINUX
This file contains
1 Overview
2 Required Files
3 Installation
3.1 Driver Installation
3.2 Inclusion of adapter at system start
4 Driver Parameters
4.1 Per-Port Parameters
4.2 Adapter Parameters
5 Large Frame Support
6 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad)
7 Troubleshooting
===========================================================================
1 Overview
===========
The sk98lin driver supports the Marvell Yukon and SysKonnect
SK-98xx/SK-95xx compliant Gigabit Ethernet Adapter on Linux. It has
been tested with Linux on Intel/x86 machines.
***
2 Required Files
=================
The linux kernel source.
No additional files required.
***
3 Installation
===============
It is recommended to download the latest version of the driver from the
SysKonnect web site www.syskonnect.com. If you have downloaded the latest
driver, the Linux kernel has to be patched before the driver can be
installed. For details on how to patch a Linux kernel, refer to the
patch.txt file.
3.1 Driver Installation
------------------------
The following steps describe the actions that are required to install
the driver and to start it manually. These steps should be carried
out for the initial driver setup. Once confirmed to be ok, they can
be included in the system start.
NOTE 1: To perform the following tasks you need 'root' access.
NOTE 2: In case of problems, please read the section "Troubleshooting"
below.
The driver can either be integrated into the kernel or it can be compiled
as a module. Select the appropriate option during the kernel
configuration.
Compile/use the driver as a module
----------------------------------
To compile the driver, go to the directory /usr/src/linux and
execute the command "make menuconfig" or "make xconfig" and proceed as
follows:
To integrate the driver permanently into the kernel, proceed as follows:
1. Select the menu "Network device support" and then "Ethernet(1000Mbit)"
2. Mark "Marvell Yukon Chipset / SysKonnect SK-98xx family support"
with (*)
3. Build a new kernel when the configuration of the above options is
finished.
4. Install the new kernel.
5. Reboot your system.
To use the driver as a module, proceed as follows:
1. Enable 'loadable module support' in the kernel.
2. For automatic driver start, enable the 'Kernel module loader'.
3. Select the menu "Network device support" and then "Ethernet(1000Mbit)"
4. Mark "Marvell Yukon Chipset / SysKonnect SK-98xx family support"
with (M)
5. Execute the command "make modules".
6. Execute the command "make modules_install".
The appropriate modules will be installed.
7. Reboot your system.
Load the module manually
------------------------
To load the module manually, proceed as follows:
1. Enter "modprobe sk98lin".
2. If a Marvell Yukon or SysKonnect SK-98xx adapter is installed in
your computer and you have a /proc file system, execute the command:
"ls /proc/net/sk98lin/"
This should produce an output containing a line with the following
format:
eth0 eth1 ...
which indicates that your adapter has been found and initialized.
NOTE 1: If you have more than one Marvell Yukon or SysKonnect SK-98xx
adapter installed, the adapters will be listed as 'eth0',
'eth1', 'eth2', etc.
For each adapter, repeat steps 3 and 4 below.
NOTE 2: If you have other Ethernet adapters installed, your Marvell
Yukon or SysKonnect SK-98xx adapter will be mapped to the
next available number, e.g. 'eth1'. The mapping is executed
automatically.
The module installation message (displayed either in a system
log file or on the console) prints a line for each adapter
found containing the corresponding 'ethX'.
3. Select an IP address and assign it to the respective adapter by
entering:
ifconfig eth0 <ip-address>
With this command, the adapter is connected to the Ethernet.
SK-98xx Gigabit Ethernet Server Adapters: The yellow LED on the adapter
is now active, the link status LED of the primary port is active and
the link status LED of the secondary port (on dual port adapters) is
blinking (if the ports are connected to a switch or hub).
SK-98xx V2.0 Gigabit Ethernet Adapters: The link status LED is active.
In addition, you will receive a status message on the console stating
"ethX: network connection up using port Y" and showing the selected
connection parameters (x stands for the ethernet device number
(0,1,2, etc), y stands for the port name (A or B)).
NOTE: If you are in doubt about IP addresses, ask your network
administrator for assistance.
4. Your adapter should now be fully operational.
Use 'ping <otherstation>' to verify the connection to other computers
on your network.
5. To check the adapter configuration view /proc/net/sk98lin/[devicename].
For example by executing:
"cat /proc/net/sk98lin/eth0"
Unload the module
-----------------
To stop and unload the driver modules, proceed as follows:
1. Execute the command "ifconfig eth0 down".
2. Execute the command "rmmod sk98lin".
3.2 Inclusion of adapter at system start
-----------------------------------------
Since a large number of different Linux distributions are
available, we are unable to describe a general installation procedure
for the driver module.
Because the driver is now integrated in the kernel, installation should
be easy, using the standard mechanism of your distribution.
Refer to the distribution's manual for installation of ethernet adapters.
***
4 Driver Parameters
====================
Parameters can be set at the command line after the module has been
loaded with the command 'modprobe'.
In some distributions, the configuration tools are able to pass parameters
to the driver module.
If you use the kernel module loader, you can set driver parameters
in the file /etc/modprobe.conf (or /etc/modules.conf in 2.4 or earlier).
To set the driver parameters in this file, proceed as follows:
1. Insert a line of the form :
options sk98lin ...
For "...", the same syntax is required as described for the command
line parameters of modprobe below.
2. To activate the new parameters, either reboot your computer
or
unload and reload the driver.
The syntax of the driver parameters is:
modprobe sk98lin parameter=value1[,value2[,value3...]]
where value1 refers to the first adapter, value2 to the second etc.
NOTE: All parameters are case sensitive. Write them exactly as shown
below.
Example:
Suppose you have two adapters. You want to set auto-negotiation
on the first adapter to ON and on the second adapter to OFF.
You also want to set DuplexCapabilities on the first adapter
to FULL, and on the second adapter to HALF.
Then, you must enter:
modprobe sk98lin AutoNeg_A=On,Off DupCap_A=Full,Half
NOTE: The number of adapters that can be configured this way is
limited in the driver (file skge.c, constant SK_MAX_CARD_PARAM).
The current limit is 16. If you happen to install
more adapters, adjust this and recompile.
4.1 Per-Port Parameters
------------------------
These settings are available for each port on the adapter.
In the following description, '?' stands for the port for
which you set the parameter (A or B).
Speed
-----
Parameter: Speed_?
Values: 10, 100, 1000, Auto
Default: Auto
This parameter is used to set the speed capabilities. It is only valid
for the SK-98xx V2.0 copper adapters.
Usually, the speed is negotiated between the two ports during link
establishment. If this fails, a port can be forced to a specific setting
with this parameter.
Auto-Negotiation
----------------
Parameter: AutoNeg_?
Values: On, Off, Sense
Default: On
The "Sense"-mode automatically detects whether the link partner supports
auto-negotiation or not.
Duplex Capabilities
-------------------
Parameter: DupCap_?
Values: Half, Full, Both
Default: Both
This parameters is only relevant if auto-negotiation for this port is
not set to "Sense". If auto-negotiation is set to "On", all three values
are possible. If it is set to "Off", only "Full" and "Half" are allowed.
This parameter is useful if your link partner does not support all
possible combinations.
Flow Control
------------
Parameter: FlowCtrl_?
Values: Sym, SymOrRem, LocSend, None
Default: SymOrRem
This parameter can be used to set the flow control capabilities the
port reports during auto-negotiation. It can be set for each port
individually.
Possible modes:
-- Sym = Symmetric: both link partners are allowed to send
PAUSE frames
-- SymOrRem = SymmetricOrRemote: both or only remote partner
are allowed to send PAUSE frames
-- LocSend = LocalSend: only local link partner is allowed
to send PAUSE frames
-- None = no link partner is allowed to send PAUSE frames
NOTE: This parameter is ignored if auto-negotiation is set to "Off".
Role in Master-Slave-Negotiation (1000Base-T only)
--------------------------------------------------
Parameter: Role_?
Values: Auto, Master, Slave
Default: Auto
This parameter is only valid for the SK-9821 and SK-9822 adapters.
For two 1000Base-T ports to communicate, one must take the role of the
master (providing timing information), while the other must be the
slave. Usually, this is negotiated between the two ports during link
establishment. If this fails, a port can be forced to a specific setting
with this parameter.
4.2 Adapter Parameters
-----------------------
Connection Type (SK-98xx V2.0 copper adapters only)
---------------
Parameter: ConType
Values: Auto, 100FD, 100HD, 10FD, 10HD
Default: Auto
The parameter 'ConType' is a combination of all five per-port parameters
within one single parameter. This simplifies the configuration of both ports
of an adapter card! The different values of this variable reflect the most
meaningful combinations of port parameters.
The following table shows the values of 'ConType' and the corresponding
combinations of the per-port parameters:
ConType | DupCap AutoNeg FlowCtrl Role Speed
----------+------------------------------------------------------
Auto | Both On SymOrRem Auto Auto
100FD | Full Off None Auto (ignored) 100
100HD | Half Off None Auto (ignored) 100
10FD | Full Off None Auto (ignored) 10
10HD | Half Off None Auto (ignored) 10
Stating any other port parameter together with this 'ConType' variable
will result in a merged configuration of those settings. This due to
the fact, that the per-port parameters (e.g. Speed_? ) have a higher
priority than the combined variable 'ConType'.
NOTE: This parameter is always used on both ports of the adapter card.
Interrupt Moderation
--------------------
Parameter: Moderation
Values: None, Static, Dynamic
Default: None
Interrupt moderation is employed to limit the maximum number of interrupts
the driver has to serve. That is, one or more interrupts (which indicate any
transmit or receive packet to be processed) are queued until the driver
processes them. When queued interrupts are to be served, is determined by the
'IntsPerSec' parameter, which is explained later below.
Possible modes:
-- None - No interrupt moderation is applied on the adapter card.
Therefore, each transmit or receive interrupt is served immediately
as soon as it appears on the interrupt line of the adapter card.
-- Static - Interrupt moderation is applied on the adapter card.
All transmit and receive interrupts are queued until a complete
moderation interval ends. If such a moderation interval ends, all
queued interrupts are processed in one big bunch without any delay.
The term 'static' reflects the fact, that interrupt moderation is
always enabled, regardless how much network load is currently
passing via a particular interface. In addition, the duration of
the moderation interval has a fixed length that never changes while
the driver is operational.
-- Dynamic - Interrupt moderation might be applied on the adapter card,
depending on the load of the system. If the driver detects that the
system load is too high, the driver tries to shield the system against
too much network load by enabling interrupt moderation. If - at a later
time - the CPU utilization decreases again (or if the network load is
negligible) the interrupt moderation will automatically be disabled.
Interrupt moderation should be used when the driver has to handle one or more
interfaces with a high network load, which - as a consequence - leads also to a
high CPU utilization. When moderation is applied in such high network load
situations, CPU load might be reduced by 20-30%.
NOTE: The drawback of using interrupt moderation is an increase of the round-
trip-time (RTT), due to the queueing and serving of interrupts at dedicated
moderation times.
Interrupts per second
---------------------
Parameter: IntsPerSec
Values: 30...40000 (interrupts per second)
Default: 2000
This parameter is only used if either static or dynamic interrupt moderation
is used on a network adapter card. Using this parameter if no moderation is
applied will lead to no action performed.
This parameter determines the length of any interrupt moderation interval.
Assuming that static interrupt moderation is to be used, an 'IntsPerSec'
parameter value of 2000 will lead to an interrupt moderation interval of
500 microseconds.
NOTE: The duration of the moderation interval is to be chosen with care.
At first glance, selecting a very long duration (e.g. only 100 interrupts per
second) seems to be meaningful, but the increase of packet-processing delay
is tremendous. On the other hand, selecting a very short moderation time might
compensate the use of any moderation being applied.
Preferred Port
--------------
Parameter: PrefPort
Values: A, B
Default: A
This is used to force the preferred port to A or B (on dual-port network
adapters). The preferred port is the one that is used if both are detected
as fully functional.
RLMT Mode (Redundant Link Management Technology)
------------------------------------------------
Parameter: RlmtMode
Values: CheckLinkState,CheckLocalPort, CheckSeg, DualNet
Default: CheckLinkState
RLMT monitors the status of the port. If the link of the active port
fails, RLMT switches immediately to the standby link. The virtual link is
maintained as long as at least one 'physical' link is up.
Possible modes:
-- CheckLinkState - Check link state only: RLMT uses the link state
reported by the adapter hardware for each individual port to
determine whether a port can be used for all network traffic or
not.
-- CheckLocalPort - In this mode, RLMT monitors the network path
between the two ports of an adapter by regularly exchanging packets
between them. This mode requires a network configuration in which
the two ports are able to "see" each other (i.e. there must not be
any router between the ports).
-- CheckSeg - Check local port and segmentation: This mode supports the
same functions as the CheckLocalPort mode and additionally checks
network segmentation between the ports. Therefore, this mode is only
to be used if Gigabit Ethernet switches are installed on the network
that have been configured to use the Spanning Tree protocol.
-- DualNet - In this mode, ports A and B are used as separate devices.
If you have a dual port adapter, port A will be configured as eth0
and port B as eth1. Both ports can be used independently with
distinct IP addresses. The preferred port setting is not used.
RLMT is turned off.
NOTE: RLMT modes CLP and CLPSS are designed to operate in configurations
where a network path between the ports on one adapter exists.
Moreover, they are not designed to work where adapters are connected
back-to-back.
***
5 Large Frame Support
======================
The driver supports large frames (also called jumbo frames). Using large
frames can result in an improved throughput if transferring large amounts
of data.
To enable large frames, set the MTU (maximum transfer unit) of the
interface to the desired value (up to 9000), execute the following
command:
ifconfig eth0 mtu 9000
This will only work if you have two adapters connected back-to-back
or if you use a switch that supports large frames. When using a switch,
it should be configured to allow large frames and auto-negotiation should
be set to OFF. The setting must be configured on all adapters that can be
reached by the large frames. If one adapter is not set to receive large
frames, it will simply drop them.
You can switch back to the standard ethernet frame size by executing the
following command:
ifconfig eth0 mtu 1500
To permanently configure this setting, add a script with the 'ifconfig'
line to the system startup sequence (named something like "S99sk98lin"
in /etc/rc.d/rc2.d).
***
6 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad)
==================================================================
The Marvell Yukon/SysKonnect Linux drivers are able to support VLAN and
Link Aggregation according to IEEE standards 802.1, 802.1q, and 802.3ad.
These features are only available after installation of open source
modules available on the Internet:
For VLAN go to: http://www.candelatech.com/~greear/vlan.html
For Link Aggregation go to: http://www.st.rim.or.jp/~yumo
NOTE: SysKonnect GmbH does not offer any support for these open source
modules and does not take the responsibility for any kind of
failures or problems arising in connection with these modules.
NOTE: Configuring Link Aggregation on a SysKonnect dual link adapter may
cause problems when unloading the driver.
7 Troubleshooting
==================
If any problems occur during the installation process, check the
following list:
Problem: The SK-98xx adapter cannot be found by the driver.
Solution: In /proc/pci search for the following entry:
'Ethernet controller: SysKonnect SK-98xx ...'
If this entry exists, the SK-98xx or SK-98xx V2.0 adapter has
been found by the system and should be operational.
If this entry does not exist or if the file '/proc/pci' is not
found, there may be a hardware problem or the PCI support may
not be enabled in your kernel.
The adapter can be checked using the diagnostics program which
is available on the SysKonnect web site:
www.syskonnect.com
Some COMPAQ machines have problems dealing with PCI under Linux.
This problem is described in the 'PCI howto' document
(included in some distributions or available from the
web, e.g. at 'www.linux.org').
Problem: Programs such as 'ifconfig' or 'route' cannot be found or the
error message 'Operation not permitted' is displayed.
Reason: You are not logged in as user 'root'.
Solution: Logout and login as 'root' or change to 'root' via 'su'.
Problem: Upon use of the command 'ping <address>' the message
"ping: sendto: Network is unreachable" is displayed.
Reason: Your route is not set correctly.
Solution: If you are using RedHat, you probably forgot to set up the
route in the 'network configuration'.
Check the existing routes with the 'route' command and check
if an entry for 'eth0' exists, and if so, if it is set correctly.
Problem: The driver can be started, the adapter is connected to the
network, but you cannot receive or transmit any packets;
e.g. 'ping' does not work.
Reason: There is an incorrect route in your routing table.
Solution: Check the routing table with the command 'route' and read the
manual help pages dealing with routes (enter 'man route').
NOTE: Although the 2.2.x kernel versions generate the routing entry
automatically, problems of this kind may occur here as well. We've
come across a situation in which the driver started correctly at
system start, but after the driver has been removed and reloaded,
the route of the adapter's network pointed to the 'dummy0'device
and had to be corrected manually.
Problem: Your computer should act as a router between multiple
IP subnetworks (using multiple adapters), but computers in
other subnetworks cannot be reached.
Reason: Either the router's kernel is not configured for IP forwarding
or the routing table and gateway configuration of at least one
computer is not working.
Problem: Upon driver start, the following error message is displayed:
"eth0: -- ERROR --
Class: internal Software error
Nr: 0xcc
Msg: SkGeInitPort() cannot init running ports"
Reason: You are using a driver compiled for single processor machines
on a multiprocessor machine with SMP (Symmetric MultiProcessor)
kernel.
Solution: Configure your kernel appropriately and recompile the kernel or
the modules.
If your problem is not listed here, please contact SysKonnect's technical
support for help (linux@syskonnect.de).
When contacting our technical support, please ensure that the following
information is available:
- System Manufacturer and HW Informations (CPU, Memory... )
- PCI-Boards in your system
- Distribution
- Kernel version
- Driver version
***
***End of Readme File***

View File

@ -155,6 +155,8 @@ Suppose, however, that the firmware file is located on a filesystem accessible
only through another device that hasn't been resumed yet. In that case,
request_firmware() will fail regardless of whether or not the freezing of tasks
is used. Consequently, the problem is not really related to the freezing of
tasks, since it generally exists anyway. [The solution to this particular
problem is to keep the firmware in memory after it's loaded for the first time
and upload if from memory to the device whenever necessary.]
tasks, since it generally exists anyway.
A driver must have all firmwares it may need in RAM before suspend() is called.
If keeping them is not practical, for example due to their size, they must be
requested early enough using the suspend notifier API described in notifiers.txt.

View File

@ -0,0 +1,50 @@
Suspend notifiers
(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
There are some operations that device drivers may want to carry out in their
.suspend() routines, but shouldn't, because they can cause the hibernation or
suspend to fail. For example, a driver may want to allocate a substantial amount
of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
swsusp's memory shrinker has run.
Also, there may be some operations, that subsystems want to carry out before a
hibernation/suspend or after a restore/resume, requiring the system to be fully
functional, so the drivers' .suspend() and .resume() routines are not suitable
for this purpose. For example, device drivers may want to upload firmware to
their devices after a restore from a hibernation image, but they cannot do it by
calling request_firmware() from their .resume() routines (user land processes
are frozen at this point). The solution may be to load the firmware into
memory before processes are frozen and upload it from there in the .resume()
routine. Of course, a hibernation notifier may be used for this purpose.
The subsystems that have such needs can register suspend notifiers that will be
called upon the following events by the suspend core:
PM_HIBERNATION_PREPARE The system is going to hibernate or suspend, tasks will
be frozen immediately.
PM_POST_HIBERNATION The system memory state has been restored from a
hibernation image or an error occured during the
hibernation. Device drivers' .resume() callbacks have
been executed and tasks have been thawed.
PM_SUSPEND_PREPARE The system is preparing for a suspend.
PM_POST_SUSPEND The system has just resumed or an error occured during
the suspend. Device drivers' .resume() callbacks have
been executed and tasks have been thawed.
It is generally assumed that whatever the notifiers do for
PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION. Analogously,
operations performed for PM_SUSPEND_PREPARE should be reversed for
PM_POST_SUSPEND. Additionally, all of the notifiers are called for
PM_POST_HIBERNATION if one of them fails for PM_HIBERNATION_PREPARE, and
all of the notifiers are called for PM_POST_SUSPEND if one of them fails for
PM_SUSPEND_PREPARE.
The hibernation and suspend notifiers are called with pm_mutex held. They are
defined in the usual way, but their last argument is meaningless (it is always
NULL). To register and/or unregister a suspend notifier use the functions
register_pm_notifier() and unregister_pm_notifier(), respectively, defined in
include/linux/suspend.h . If you don't need to unregister the notifier, you can
also use the pm_notifier() macro defined in include/linux/suspend.h .

View File

@ -50,7 +50,7 @@ Table of Contents
g) Freescale SOC SEC Security Engines
h) Board Control and Status (BCSR)
i) Freescale QUICC Engine module (QE)
j) Flash chip nodes
j) CFI or JEDEC memory-mapped NOR flash
k) Global Utilities Block
VII - Specifying interrupt information for devices
@ -1250,6 +1250,12 @@ platforms are moved over to use the flattened-device-tree model.
network device. This is used by the bootwrapper to interpret
MAC addresses passed by the firmware when no information other
than indices is available to associate an address with a device.
- phy-connection-type : a string naming the controller/PHY interface type,
i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id", "sgmii",
"tbi", or "rtbi". This property is only really needed if the connection
is of type "rgmii-id", as all other connection types are detected by
hardware.
Example:
@ -1504,7 +1510,10 @@ platforms are moved over to use the flattened-device-tree model.
i) Freescale QUICC Engine module (QE)
This represents qe module that is installed on PowerQUICC II Pro.
Hopefully it will merge backward compatibility with CPM/CPM2.
NOTE: This is an interim binding; it should be updated to fit
in with the CPM binding later in this document.
Basically, it is a bus of devices, that could act more or less
as a complete entity (UCC, USB etc ). All of them should be siblings on
the "root" qe node, using the common properties from there.
@ -1542,7 +1551,7 @@ platforms are moved over to use the flattened-device-tree model.
Required properties:
- device_type : should be "spi".
- compatible : should be "fsl_spi".
- mode : the SPI operation mode, it can be "cpu" or "qe".
- mode : the SPI operation mode, it can be "cpu" or "cpu-qe".
- reg : Offset and length of the register set for the device
- interrupts : <a b> where a is the interrupt number and b is a
field that represents an encoding of the sense and level
@ -1751,45 +1760,69 @@ platforms are moved over to use the flattened-device-tree model.
};
};
j) Flash chip nodes
j) CFI or JEDEC memory-mapped NOR flash
Flash chips (Memory Technology Devices) are often used for solid state
file systems on embedded devices.
Required properties:
- compatible : should contain the specific model of flash chip(s)
used, if known, followed by either "cfi-flash" or "jedec-flash"
- reg : Address range of the flash chip
- bank-width : Width (in bytes) of the flash bank. Equal to the
device width times the number of interleaved chips.
- device-width : (optional) Width of a single flash chip. If
omitted, assumed to be equal to 'bank-width'.
- #address-cells, #size-cells : Must be present if the flash has
sub-nodes representing partitions (see below). In this case
both #address-cells and #size-cells must be equal to 1.
- device_type : has to be "rom"
- compatible : Should specify what this flash device is compatible with.
Currently, this is most likely to be "direct-mapped" (which
corresponds to the MTD physmap mapping driver).
- reg : Offset and length of the register set (or memory mapping) for
the device.
- bank-width : Width of the flash data bus in bytes. Required
for the NOR flashes (compatible == "direct-mapped" and others) ONLY.
For JEDEC compatible devices, the following additional properties
are defined:
Recommended properties :
- vendor-id : Contains the flash chip's vendor id (1 byte).
- device-id : Contains the flash chip's device id (1 byte).
- partitions : Several pairs of 32-bit values where the first value is
partition's offset from the start of the device and the second one is
partition size in bytes with LSB used to signify a read only
partition (so, the partition size should always be an even number).
- partition-names : The list of concatenated zero terminated strings
representing the partition names.
- probe-type : The type of probe which should be done for the chip
(JEDEC vs CFI actually). Valid ONLY for NOR flashes.
In addition to the information on the flash bank itself, the
device tree may optionally contain additional information
describing partitions of the flash address space. This can be
used on platforms which have strong conventions about which
portions of the flash are used for what purposes, but which don't
use an on-flash partition table such as RedBoot.
Example:
Each partition is represented as a sub-node of the flash device.
Each node's name represents the name of the corresponding
partition of the flash device.
flash@ff000000 {
device_type = "rom";
compatible = "direct-mapped";
probe-type = "CFI";
reg = <ff000000 01000000>;
bank-width = <4>;
partitions = <00000000 00f80000
00f80000 00080001>;
partition-names = "fs\0firmware";
};
Flash partitions
- reg : The partition's offset and size within the flash bank.
- label : (optional) The label / name for this flash partition.
If omitted, the label is taken from the node name (excluding
the unit address).
- read-only : (optional) This parameter, if present, is a hint to
Linux that this flash partition should only be mounted
read-only. This is usually used for flash partitions
containing early-boot firmware images or data which should not
be clobbered.
Example:
flash@ff000000 {
compatible = "amd,am29lv128ml", "cfi-flash";
reg = <ff000000 01000000>;
bank-width = <4>;
device-width = <1>;
#address-cells = <1>;
#size-cells = <1>;
fs@0 {
label = "fs";
reg = <0 f80000>;
};
firmware@f80000 {
label ="firmware";
reg = <f80000 80000>;
read-only;
};
};
k) Global Utilities Block
@ -1818,6 +1851,397 @@ platforms are moved over to use the flattened-device-tree model.
fsl,has-rstcr;
};
l) Freescale Communications Processor Module
NOTE: This is an interim binding, and will likely change slightly,
as more devices are supported. The QE bindings especially are
incomplete.
i) Root CPM node
Properties:
- compatible : "fsl,cpm1", "fsl,cpm2", or "fsl,qe".
- reg : A 48-byte region beginning with CPCR.
Example:
cpm@119c0 {
#address-cells = <1>;
#size-cells = <1>;
#interrupt-cells = <2>;
compatible = "fsl,mpc8272-cpm", "fsl,cpm2";
reg = <119c0 30>;
}
ii) Properties common to mulitple CPM/QE devices
- fsl,cpm-command : This value is ORed with the opcode and command flag
to specify the device on which a CPM command operates.
- fsl,cpm-brg : Indicates which baud rate generator the device
is associated with. If absent, an unused BRG
should be dynamically allocated. If zero, the
device uses an external clock rather than a BRG.
- reg : Unless otherwise specified, the first resource represents the
scc/fcc/ucc registers, and the second represents the device's
parameter RAM region (if it has one).
iii) Serial
Currently defined compatibles:
- fsl,cpm1-smc-uart
- fsl,cpm2-smc-uart
- fsl,cpm1-scc-uart
- fsl,cpm2-scc-uart
- fsl,qe-uart
Example:
serial@11a00 {
device_type = "serial";
compatible = "fsl,mpc8272-scc-uart",
"fsl,cpm2-scc-uart";
reg = <11a00 20 8000 100>;
interrupts = <28 8>;
interrupt-parent = <&PIC>;
fsl,cpm-brg = <1>;
fsl,cpm-command = <00800000>;
};
iii) Network
Currently defined compatibles:
- fsl,cpm1-scc-enet
- fsl,cpm2-scc-enet
- fsl,cpm1-fec-enet
- fsl,cpm2-fcc-enet (third resource is GFEMR)
- fsl,qe-enet
Example:
ethernet@11300 {
device_type = "network";
compatible = "fsl,mpc8272-fcc-enet",
"fsl,cpm2-fcc-enet";
reg = <11300 20 8400 100 11390 1>;
local-mac-address = [ 00 00 00 00 00 00 ];
interrupts = <20 8>;
interrupt-parent = <&PIC>;
phy-handle = <&PHY0>;
linux,network-index = <0>;
fsl,cpm-command = <12000300>;
};
iv) MDIO
Currently defined compatibles:
fsl,pq1-fec-mdio (reg is same as first resource of FEC device)
fsl,cpm2-mdio-bitbang (reg is port C registers)
Properties for fsl,cpm2-mdio-bitbang:
fsl,mdio-pin : pin of port C controlling mdio data
fsl,mdc-pin : pin of port C controlling mdio clock
Example:
mdio@10d40 {
device_type = "mdio";
compatible = "fsl,mpc8272ads-mdio-bitbang",
"fsl,mpc8272-mdio-bitbang",
"fsl,cpm2-mdio-bitbang";
reg = <10d40 14>;
#address-cells = <1>;
#size-cells = <0>;
fsl,mdio-pin = <12>;
fsl,mdc-pin = <13>;
};
v) Baud Rate Generators
Currently defined compatibles:
fsl,cpm-brg
fsl,cpm1-brg
fsl,cpm2-brg
Properties:
- reg : There may be an arbitrary number of reg resources; BRG
numbers are assigned to these in order.
- clock-frequency : Specifies the base frequency driving
the BRG.
Example:
brg@119f0 {
compatible = "fsl,mpc8272-brg",
"fsl,cpm2-brg",
"fsl,cpm-brg";
reg = <119f0 10 115f0 10>;
clock-frequency = <d#25000000>;
};
vi) Interrupt Controllers
Currently defined compatibles:
- fsl,cpm1-pic
- only one interrupt cell
- fsl,pq1-pic
- fsl,cpm2-pic
- second interrupt cell is level/sense:
- 2 is falling edge
- 8 is active low
Example:
interrupt-controller@10c00 {
#interrupt-cells = <2>;
interrupt-controller;
reg = <10c00 80>;
compatible = "mpc8272-pic", "fsl,cpm2-pic";
};
vii) USB (Universal Serial Bus Controller)
Properties:
- compatible : "fsl,cpm1-usb", "fsl,cpm2-usb", "fsl,qe-usb"
Example:
usb@11bc0 {
#address-cells = <1>;
#size-cells = <0>;
compatible = "fsl,cpm2-usb";
reg = <11b60 18 8b00 100>;
interrupts = <b 8>;
interrupt-parent = <&PIC>;
fsl,cpm-command = <2e600000>;
};
viii) Multi-User RAM (MURAM)
The multi-user/dual-ported RAM is expressed as a bus under the CPM node.
Ranges must be set up subject to the following restrictions:
- Children's reg nodes must be offsets from the start of all muram, even
if the user-data area does not begin at zero.
- If multiple range entries are used, the difference between the parent
address and the child address must be the same in all, so that a single
mapping can cover them all while maintaining the ability to determine
CPM-side offsets with pointer subtraction. It is recommended that
multiple range entries not be used.
- A child address of zero must be translatable, even if no reg resources
contain it.
A child "data" node must exist, compatible with "fsl,cpm-muram-data", to
indicate the portion of muram that is usable by the OS for arbitrary
purposes. The data node may have an arbitrary number of reg resources,
all of which contribute to the allocatable muram pool.
Example, based on mpc8272:
muram@0 {
#address-cells = <1>;
#size-cells = <1>;
ranges = <0 0 10000>;
data@0 {
compatible = "fsl,cpm-muram-data";
reg = <0 2000 9800 800>;
};
};
m) Chipselect/Local Bus
Properties:
- name : Should be localbus
- #address-cells : Should be either two or three. The first cell is the
chipselect number, and the remaining cells are the
offset into the chipselect.
- #size-cells : Either one or two, depending on how large each chipselect
can be.
- ranges : Each range corresponds to a single chipselect, and cover
the entire access window as configured.
Example:
localbus@f0010100 {
compatible = "fsl,mpc8272ads-localbus",
"fsl,mpc8272-localbus",
"fsl,pq2-localbus";
#address-cells = <2>;
#size-cells = <1>;
reg = <f0010100 40>;
ranges = <0 0 fe000000 02000000
1 0 f4500000 00008000>;
flash@0,0 {
compatible = "jedec-flash";
reg = <0 0 2000000>;
bank-width = <4>;
device-width = <1>;
};
board-control@1,0 {
reg = <1 0 20>;
compatible = "fsl,mpc8272ads-bcsr";
};
};
n) 4xx/Axon EMAC ethernet nodes
The EMAC ethernet controller in IBM and AMCC 4xx chips, and also
the Axon bridge. To operate this needs to interact with a ths
special McMAL DMA controller, and sometimes an RGMII or ZMII
interface. In addition to the nodes and properties described
below, the node for the OPB bus on which the EMAC sits must have a
correct clock-frequency property.
i) The EMAC node itself
Required properties:
- device_type : "network"
- compatible : compatible list, contains 2 entries, first is
"ibm,emac-CHIP" where CHIP is the host ASIC (440gx,
405gp, Axon) and second is either "ibm,emac" or
"ibm,emac4". For Axon, thus, we have: "ibm,emac-axon",
"ibm,emac4"
- interrupts : <interrupt mapping for EMAC IRQ and WOL IRQ>
- interrupt-parent : optional, if needed for interrupt mapping
- reg : <registers mapping>
- local-mac-address : 6 bytes, MAC address
- mal-device : phandle of the associated McMAL node
- mal-tx-channel : 1 cell, index of the tx channel on McMAL associated
with this EMAC
- mal-rx-channel : 1 cell, index of the rx channel on McMAL associated
with this EMAC
- cell-index : 1 cell, hardware index of the EMAC cell on a given
ASIC (typically 0x0 and 0x1 for EMAC0 and EMAC1 on
each Axon chip)
- max-frame-size : 1 cell, maximum frame size supported in bytes
- rx-fifo-size : 1 cell, Rx fifo size in bytes for 10 and 100 Mb/sec
operations.
For Axon, 2048
- tx-fifo-size : 1 cell, Tx fifo size in bytes for 10 and 100 Mb/sec
operations.
For Axon, 2048.
- fifo-entry-size : 1 cell, size of a fifo entry (used to calculate
thresholds).
For Axon, 0x00000010
- mal-burst-size : 1 cell, MAL burst size (used to calculate thresholds)
in bytes.
For Axon, 0x00000100 (I think ...)
- phy-mode : string, mode of operations of the PHY interface.
Supported values are: "mii", "rmii", "smii", "rgmii",
"tbi", "gmii", rtbi", "sgmii".
For Axon on CAB, it is "rgmii"
- mdio-device : 1 cell, required iff using shared MDIO registers
(440EP). phandle of the EMAC to use to drive the
MDIO lines for the PHY used by this EMAC.
- zmii-device : 1 cell, required iff connected to a ZMII. phandle of
the ZMII device node
- zmii-channel : 1 cell, required iff connected to a ZMII. Which ZMII
channel or 0xffffffff if ZMII is only used for MDIO.
- rgmii-device : 1 cell, required iff connected to an RGMII. phandle
of the RGMII device node.
For Axon: phandle of plb5/plb4/opb/rgmii
- rgmii-channel : 1 cell, required iff connected to an RGMII. Which
RGMII channel is used by this EMAC.
Fox Axon: present, whatever value is appropriate for each
EMAC, that is the content of the current (bogus) "phy-port"
property.
Recommended properties:
- linux,network-index : This is the intended "index" of this
network device. This is used by the bootwrapper to interpret
MAC addresses passed by the firmware when no information other
than indices is available to associate an address with a device.
Optional properties:
- phy-address : 1 cell, optional, MDIO address of the PHY. If absent,
a search is performed.
- phy-map : 1 cell, optional, bitmap of addresses to probe the PHY
for, used if phy-address is absent. bit 0x00000001 is
MDIO address 0.
For Axon it can be absent, thouugh my current driver
doesn't handle phy-address yet so for now, keep
0x00ffffff in it.
- rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
operations (if absent the value is the same as
rx-fifo-size). For Axon, either absent or 2048.
- tx-fifo-size-gige : 1 cell, Tx fifo size in bytes for 1000 Mb/sec
operations (if absent the value is the same as
tx-fifo-size). For Axon, either absent or 2048.
- tah-device : 1 cell, optional. If connected to a TAH engine for
offload, phandle of the TAH device node.
- tah-channel : 1 cell, optional. If appropriate, channel used on the
TAH engine.
Example:
EMAC0: ethernet@40000800 {
linux,network-index = <0>;
device_type = "network";
compatible = "ibm,emac-440gp", "ibm,emac";
interrupt-parent = <&UIC1>;
interrupts = <1c 4 1d 4>;
reg = <40000800 70>;
local-mac-address = [00 04 AC E3 1B 1E];
mal-device = <&MAL0>;
mal-tx-channel = <0 1>;
mal-rx-channel = <0>;
cell-index = <0>;
max-frame-size = <5dc>;
rx-fifo-size = <1000>;
tx-fifo-size = <800>;
phy-mode = "rmii";
phy-map = <00000001>;
zmii-device = <&ZMII0>;
zmii-channel = <0>;
};
ii) McMAL node
Required properties:
- device_type : "dma-controller"
- compatible : compatible list, containing 2 entries, first is
"ibm,mcmal-CHIP" where CHIP is the host ASIC (like
emac) and the second is either "ibm,mcmal" or
"ibm,mcmal2".
For Axon, "ibm,mcmal-axon","ibm,mcmal2"
- interrupts : <interrupt mapping for the MAL interrupts sources:
5 sources: tx_eob, rx_eob, serr, txde, rxde>.
For Axon: This is _different_ from the current
firmware. We use the "delayed" interrupts for txeob
and rxeob. Thus we end up with mapping those 5 MPIC
interrupts, all level positive sensitive: 10, 11, 32,
33, 34 (in decimal)
- dcr-reg : < DCR registers range >
- dcr-parent : if needed for dcr-reg
- num-tx-chans : 1 cell, number of Tx channels
- num-rx-chans : 1 cell, number of Rx channels
iii) ZMII node
Required properties:
- compatible : compatible list, containing 2 entries, first is
"ibm,zmii-CHIP" where CHIP is the host ASIC (like
EMAC) and the second is "ibm,zmii".
For Axon, there is no ZMII node.
- reg : <registers mapping>
iv) RGMII node
Required properties:
- compatible : compatible list, containing 2 entries, first is
"ibm,rgmii-CHIP" where CHIP is the host ASIC (like
EMAC) and the second is "ibm,rgmii".
For Axon, "ibm,rgmii-axon","ibm,rgmii"
- reg : <registers mapping>
- revision : as provided by the RGMII new version register if
available.
For Axon: 0x0000012a
More devices will be defined as this spec matures.
VII - Specifying interrupt information for devices

89
Documentation/rfkill.txt Normal file
View File

@ -0,0 +1,89 @@
rfkill - RF switch subsystem support
====================================
1 Implementation details
2 Driver support
3 Userspace support
===============================================================================
1: Implementation details
The rfkill switch subsystem offers support for keys often found on laptops
to enable wireless devices like WiFi and Bluetooth.
This is done by providing the user 3 possibilities:
1 - The rfkill system handles all events; userspace is not aware of events.
2 - The rfkill system handles all events; userspace is informed about the events.
3 - The rfkill system does not handle events; userspace handles all events.
The buttons to enable and disable the wireless radios are important in
situations where the user is for example using his laptop on a location where
wireless radios _must_ be disabled (e.g. airplanes).
Because of this requirement, userspace support for the keys should not be
made mandatory. Because userspace might want to perform some additional smarter
tasks when the key is pressed, rfkill still provides userspace the possibility
to take over the task to handle the key events.
The system inside the kernel has been split into 2 separate sections:
1 - RFKILL
2 - RFKILL_INPUT
The first option enables rfkill support and will make sure userspace will
be notified of any events through the input device. It also creates several
sysfs entries which can be used by userspace. See section "Userspace support".
The second option provides an rfkill input handler. This handler will
listen to all rfkill key events and will toggle the radio accordingly.
With this option enabled userspace could either do nothing or simply
perform monitoring tasks.
====================================
2: Driver support
To build a driver with rfkill subsystem support, the driver should
depend on the Kconfig symbol RFKILL; it should _not_ depend on
RKFILL_INPUT.
Unless key events trigger an interrupt to which the driver listens, polling
will be required to determine the key state changes. For this the input
layer providers the input-polldev handler.
A driver should implement a few steps to correctly make use of the
rfkill subsystem. First for non-polling drivers:
- rfkill_allocate()
- input_allocate_device()
- rfkill_register()
- input_register_device()
For polling drivers:
- rfkill_allocate()
- input_allocate_polled_device()
- rfkill_register()
- input_register_polled_device()
When a key event has been detected, the correct event should be
sent over the input device which has been registered by the driver.
====================================
3: Userspace support
For each key an input device will be created which will send out the correct
key event when the rfkill key has been pressed.
The following sysfs entries will be created:
name: Name assigned by driver to this key (interface or driver name).
type: Name of the key type ("wlan", "bluetooth", etc).
state: Current state of the key. 1: On, 0: Off.
claim: 1: Userspace handles events, 0: Kernel handles events
Both the "state" and "claim" entries are also writable. For the "state" entry
this means that when 1 or 0 is written all radios, not yet in the requested
state, will be will be toggled accordingly.
For the "claim" entry writing 1 to it means that the kernel no longer handles
key events even though RFKILL_INPUT input was enabled. When "claim" has been
set to 0, userspace should make sure that it listens for the input events or
check the sysfs "state" entry regularly to correctly perform the required
tasks when the rkfill key is pressed.

View File

@ -0,0 +1,26 @@
00-INDEX
- this file.
3270.ChangeLog
- ChangeLog for the UTS Global 3270-support patch (outdated).
3270.txt
- how to use the IBM 3270 display system support.
cds.txt
- s390 common device support (common I/O layer).
CommonIO
- common I/O layer command line parameters, procfs and debugfs entries
config3270.sh
- example configuration for 3270 devices.
DASD
- information on the DASD disk device driver.
Debugging390.txt
- hints for debugging on s390 systems.
driver-model.txt
- information on s390 devices and the driver model.
monreader.txt
- information on accessing the z/VM monitor stream from Linux.
s390dbf.txt
- information on using the s390 debug feature.
TAPE
- information on the driver for channel-attached tapes.
zfcpdump
- information on the s390 SCSI dump tool.

View File

@ -1,5 +1,5 @@
S/390 common I/O-Layer - command line parameters and /proc entries
==================================================================
S/390 common I/O-Layer - command line parameters, procfs and debugfs entries
============================================================================
Command line parameters
-----------------------
@ -7,9 +7,9 @@ Command line parameters
* cio_msg = yes | no
Determines whether information on found devices and sensed device
characteristics should be shown during startup, i. e. messages of the types
"Detected device 0.0.4711 on subchannel 0.0.0042" and "SenseID: Device
0.0.4711 reports: ...".
characteristics should be shown during startup or when new devices are
found, i. e. messages of the types "Detected device 0.0.4711 on subchannel
0.0.0042" and "SenseID: Device 0.0.4711 reports: ...".
Default is off.
@ -26,8 +26,10 @@ Command line parameters
An ignored device can be un-ignored later; see the "/proc entries"-section for
details.
The devices must be given either as bus ids (0.0.abcd) or as hexadecimal
device numbers (0xabcd or abcd, for 2.4 backward compatibility).
The devices must be given either as bus ids (0.x.abcd) or as hexadecimal
device numbers (0xabcd or abcd, for 2.4 backward compatibility). If you
give a device number 0xabcd, it will be interpreted as 0.0.abcd.
You can use the 'all' keyword to ignore all devices.
The '!' operator will cause the I/O-layer to _not_ ignore a device.
The command line is parsed from left to right.
@ -81,31 +83,36 @@ Command line parameters
will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored
devices.
The devices can be specified either by bus id (0.0.abcd) or, for 2.4 backward
compatibility, by the device number in hexadecimal (0xabcd or abcd).
The devices can be specified either by bus id (0.x.abcd) or, for 2.4 backward
compatibility, by the device number in hexadecimal (0xabcd or abcd). Device
numbers given as 0xabcd will be interpreted as 0.0.abcd.
* For some of the information present in the /proc filesystem in 2.4 (namely,
/proc/subchannels and /proc/chpids), see driver-model.txt.
Information formerly in /proc/irq_count is now in /proc/interrupts.
* /proc/s390dbf/cio_*/ (S/390 debug feature)
debugfs entries
---------------
* /sys/kernel/debug/s390dbf/cio_*/ (S/390 debug feature)
Some views generated by the debug feature to hold various debug outputs.
- /proc/s390dbf/cio_crw/sprintf
- /sys/kernel/debug/s390dbf/cio_crw/sprintf
Messages from the processing of pending channel report words (machine check
handling), which will also show when CONFIG_DEBUG_CRW is defined.
handling).
- /proc/s390dbf/cio_msg/sprintf
Various debug messages from the common I/O-layer; generally, messages which
will also show when CONFIG_DEBUG_IO is defined.
- /sys/kernel/debug/s390dbf/cio_msg/sprintf
Various debug messages from the common I/O-layer, including messages
printed when cio_msg=yes.
- /proc/s390dbf/cio_trace/hex_ascii
- /sys/kernel/debug/s390dbf/cio_trace/hex_ascii
Logs the calling of functions in the common I/O-layer and, if applicable,
which subchannel they were called for, as well as dumps of some data
structures (like irb in an error case).
The level of logging can be changed to be more or less verbose by piping to
/proc/s390dbf/cio_*/level a number between 0 and 6; see the documentation on
the S/390 debug feature (Documentation/s390/s390dbf.txt) for details.
* For some of the information present in the /proc filesystem in 2.4 (namely,
/proc/subchannels and /proc/chpids), see driver-model.txt.
Information formerly in /proc/irq_count is now in /proc/interrupts.
/sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
documentation on the S/390 debug feature (Documentation/s390/s390dbf.txt)
for details.

View File

@ -286,10 +286,10 @@ first:
timeout value
-EIO: the common I/O layer terminated the request due to an error state
If the concurrent sense flag in the extended status word in the irb is set, the
field irb->scsw.count describes the number of device specific sense bytes
available in the extended control word irb->scsw.ecw[0]. No device sensing by
the device driver itself is required.
If the concurrent sense flag in the extended status word (esw) in the irb is
set, the field erw.scnt in the esw describes the number of device specific
sense bytes available in the extended control word irb->scsw.ecw[]. No device
sensing by the device driver itself is required.
The device interrupt handler can use the following definitions to investigate
the primary unit check source coded in sense byte 0 :

View File

@ -83,7 +83,7 @@ Some implementation details:
CFS uses nanosecond granularity accounting and does not rely on any
jiffies or other HZ detail. Thus the CFS scheduler has no notion of
'timeslices' and has no heuristics whatsoever. There is only one
central tunable:
central tunable (you have to switch on CONFIG_SCHED_DEBUG):
/proc/sys/kernel/sched_granularity_ns

View File

@ -0,0 +1,108 @@
This document explains the thinking about the revamped and streamlined
nice-levels implementation in the new Linux scheduler.
Nice levels were always pretty weak under Linux and people continuously
pestered us to make nice +19 tasks use up much less CPU time.
Unfortunately that was not that easy to implement under the old
scheduler, (otherwise we'd have done it long ago) because nice level
support was historically coupled to timeslice length, and timeslice
units were driven by the HZ tick, so the smallest timeslice was 1/HZ.
In the O(1) scheduler (in 2003) we changed negative nice levels to be
much stronger than they were before in 2.4 (and people were happy about
that change), and we also intentionally calibrated the linear timeslice
rule so that nice +19 level would be _exactly_ 1 jiffy. To better
understand it, the timeslice graph went like this (cheesy ASCII art
alert!):
A
\ | [timeslice length]
\ |
\ |
\ |
\ |
\|___100msecs
|^ . _
| ^ . _
| ^ . _
-*----------------------------------*-----> [nice level]
-20 | +19
|
|
So that if someone wanted to really renice tasks, +19 would give a much
bigger hit than the normal linear rule would do. (The solution of
changing the ABI to extend priorities was discarded early on.)
This approach worked to some degree for some time, but later on with
HZ=1000 it caused 1 jiffy to be 1 msec, which meant 0.1% CPU usage which
we felt to be a bit excessive. Excessive _not_ because it's too small of
a CPU utilization, but because it causes too frequent (once per
millisec) rescheduling. (and would thus trash the cache, etc. Remember,
this was long ago when hardware was weaker and caches were smaller, and
people were running number crunching apps at nice +19.)
So for HZ=1000 we changed nice +19 to 5msecs, because that felt like the
right minimal granularity - and this translates to 5% CPU utilization.
But the fundamental HZ-sensitive property for nice+19 still remained,
and we never got a single complaint about nice +19 being too _weak_ in
terms of CPU utilization, we only got complaints about it (still) being
too _strong_ :-)
To sum it up: we always wanted to make nice levels more consistent, but
within the constraints of HZ and jiffies and their nasty design level
coupling to timeslices and granularity it was not really viable.
The second (less frequent but still periodically occuring) complaint
about Linux's nice level support was its assymetry around the origo
(which you can see demonstrated in the picture above), or more
accurately: the fact that nice level behavior depended on the _absolute_
nice level as well, while the nice API itself is fundamentally
"relative":
int nice(int inc);
asmlinkage long sys_nice(int increment)
(the first one is the glibc API, the second one is the syscall API.)
Note that the 'inc' is relative to the current nice level. Tools like
bash's "nice" command mirror this relative API.
With the old scheduler, if you for example started a niced task with +1
and another task with +2, the CPU split between the two tasks would
depend on the nice level of the parent shell - if it was at nice -10 the
CPU split was different than if it was at +5 or +10.
A third complaint against Linux's nice level support was that negative
nice levels were not 'punchy enough', so lots of people had to resort to
run audio (and other multimedia) apps under RT priorities such as
SCHED_FIFO. But this caused other problems: SCHED_FIFO is not starvation
proof, and a buggy SCHED_FIFO app can also lock up the system for good.
The new scheduler in v2.6.23 addresses all three types of complaints:
To address the first complaint (of nice levels being not "punchy"
enough), the scheduler was decoupled from 'time slice' and HZ concepts
(and granularity was made a separate concept from nice levels) and thus
it was possible to implement better and more consistent nice +19
support: with the new scheduler nice +19 tasks get a HZ-independent
1.5%, instead of the variable 3%-5%-9% range they got in the old
scheduler.
To address the second complaint (of nice levels not being consistent),
the new scheduler makes nice(1) have the same CPU utilization effect on
tasks, regardless of their absolute nice levels. So on the new
scheduler, running a nice +10 and a nice 11 task has the same CPU
utilization "split" between them as running a nice -5 and a nice -4
task. (one will get 55% of the CPU, the other 45%.) That is why nice
levels were changed to be "multiplicative" (or exponential) - that way
it does not matter which nice level you start out from, the 'relative
result' will always be the same.
The third complaint (of negative nice levels not being "punchy" enough
and forcing audio apps to run under the more dangerous SCHED_FIFO
scheduling policy) is addressed by the new scheduler almost
automatically: stronger negative nice levels are an automatic
side-effect of the recalibrated dynamic range of nice levels.

View File

@ -1,10 +1,11 @@
Version 10 of schedstats includes support for sched_domains, which
hit the mainline kernel in 2.6.7. Some counters make more sense to be
per-runqueue; other to be per-domain. Note that domains (and their associated
information) will only be pertinent and available on machines utilizing
CONFIG_SMP.
Version 14 of schedstats includes support for sched_domains, which hit the
mainline kernel in 2.6.20 although it is identical to the stats from version
12 which was in the kernel from 2.6.13-2.6.19 (version 13 never saw a kernel
release). Some counters make more sense to be per-runqueue; other to be
per-domain. Note that domains (and their associated information) will only
be pertinent and available on machines utilizing CONFIG_SMP.
In version 10 of schedstat, there is at least one level of domain
In version 14 of schedstat, there is at least one level of domain
statistics for each cpu listed, and there may well be more than one
domain. Domains have no particular names in this implementation, but
the highest numbered one typically arbitrates balancing across all the
@ -27,7 +28,7 @@ to write their own scripts, the fields are described here.
CPU statistics
--------------
cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12
NOTE: In the sched_yield() statistics, the active queue is considered empty
if it has only one process in it, since obviously the process calling
@ -39,48 +40,20 @@ First four fields are sched_yield() statistics:
3) # of times just the expired queue was empty
4) # of times sched_yield() was called
Next four are schedule() statistics:
5) # of times the active queue had at least one other process on it
6) # of times we switched to the expired queue and reused it
7) # of times schedule() was called
8) # of times schedule() left the processor idle
Next three are schedule() statistics:
5) # of times we switched to the expired queue and reused it
6) # of times schedule() was called
7) # of times schedule() left the processor idle
Next four are active_load_balance() statistics:
9) # of times active_load_balance() was called
10) # of times active_load_balance() caused this cpu to gain a task
11) # of times active_load_balance() caused this cpu to lose a task
12) # of times active_load_balance() tried to move a task and failed
Next three are try_to_wake_up() statistics:
13) # of times try_to_wake_up() was called
14) # of times try_to_wake_up() successfully moved the awakening task
15) # of times try_to_wake_up() attempted to move the awakening task
Next two are wake_up_new_task() statistics:
16) # of times wake_up_new_task() was called
17) # of times wake_up_new_task() successfully moved the new task
Next one is a sched_migrate_task() statistic:
18) # of times sched_migrate_task() was called
Next one is a sched_balance_exec() statistic:
19) # of times sched_balance_exec() was called
Next two are try_to_wake_up() statistics:
8) # of times try_to_wake_up() was called
9) # of times try_to_wake_up() was called to wake up the local cpu
Next three are statistics describing scheduling latency:
20) sum of all time spent running by tasks on this processor (in ms)
21) sum of all time spent waiting to run by tasks on this processor (in ms)
22) # of tasks (not necessarily unique) given to the processor
The last six are statistics dealing with pull_task():
23) # of times pull_task() moved a task to this cpu when newly idle
24) # of times pull_task() stole a task from this cpu when another cpu
was newly idle
25) # of times pull_task() moved a task to this cpu when idle
26) # of times pull_task() stole a task from this cpu when another cpu
was idle
27) # of times pull_task() moved a task to this cpu when busy
28) # of times pull_task() stole a task from this cpu when another cpu
was busy
10) sum of all time spent running by tasks on this processor (in jiffies)
11) sum of all time spent waiting to run by tasks on this processor (in
jiffies)
12) # of timeslices run on this cpu
Domain statistics
@ -89,65 +62,95 @@ One of these is produced per domain for each cpu described. (Note that if
CONFIG_SMP is not defined, *no* domains are utilized and these lines
will not appear in the output.)
domain<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
The first field is a bit mask indicating what cpus this domain operates over.
The next fifteen are a variety of load_balance() statistics:
The next 24 are a variety of load_balance() statistics in grouped into types
of idleness (idle, busy, and newly idle):
1) # of times in this domain load_balance() was called when the cpu
was idle
2) # of times in this domain load_balance() was called when the cpu
was busy
3) # of times in this domain load_balance() was called when the cpu
was just becoming idle
4) # of times in this domain load_balance() tried to move one or more
tasks and failed, when the cpu was idle
5) # of times in this domain load_balance() tried to move one or more
tasks and failed, when the cpu was busy
6) # of times in this domain load_balance() tried to move one or more
tasks and failed, when the cpu was just becoming idle
7) sum of imbalances discovered (if any) with each call to
load_balance() in this domain when the cpu was idle
8) sum of imbalances discovered (if any) with each call to
load_balance() in this domain when the cpu was busy
9) sum of imbalances discovered (if any) with each call to
load_balance() in this domain when the cpu was just becoming idle
10) # of times in this domain load_balance() was called but did not find
a busier queue while the cpu was idle
11) # of times in this domain load_balance() was called but did not find
a busier queue while the cpu was busy
12) # of times in this domain load_balance() was called but did not find
a busier queue while the cpu was just becoming idle
13) # of times in this domain a busier queue was found while the cpu was
idle but no busier group was found
14) # of times in this domain a busier queue was found while the cpu was
busy but no busier group was found
15) # of times in this domain a busier queue was found while the cpu was
just becoming idle but no busier group was found
1) # of times in this domain load_balance() was called when the
cpu was idle
2) # of times in this domain load_balance() checked but found
the load did not require balancing when the cpu was idle
3) # of times in this domain load_balance() tried to move one or
more tasks and failed, when the cpu was idle
4) sum of imbalances discovered (if any) with each call to
load_balance() in this domain when the cpu was idle
5) # of times in this domain pull_task() was called when the cpu
was idle
6) # of times in this domain pull_task() was called even though
the target task was cache-hot when idle
7) # of times in this domain load_balance() was called but did
not find a busier queue while the cpu was idle
8) # of times in this domain a busier queue was found while the
cpu was idle but no busier group was found
Next two are sched_balance_exec() statistics:
17) # of times in this domain sched_balance_exec() successfully pushed
a task to a new cpu
18) # of times in this domain sched_balance_exec() tried but failed to
push a task to a new cpu
9) # of times in this domain load_balance() was called when the
cpu was busy
10) # of times in this domain load_balance() checked but found the
load did not require balancing when busy
11) # of times in this domain load_balance() tried to move one or
more tasks and failed, when the cpu was busy
12) sum of imbalances discovered (if any) with each call to
load_balance() in this domain when the cpu was busy
13) # of times in this domain pull_task() was called when busy
14) # of times in this domain pull_task() was called even though the
target task was cache-hot when busy
15) # of times in this domain load_balance() was called but did not
find a busier queue while the cpu was busy
16) # of times in this domain a busier queue was found while the cpu
was busy but no busier group was found
Next two are try_to_wake_up() statistics:
19) # of times in this domain try_to_wake_up() tried to move a task based
on affinity and cache warmth
20) # of times in this domain try_to_wake_up() tried to move a task based
on load balancing
17) # of times in this domain load_balance() was called when the
cpu was just becoming idle
18) # of times in this domain load_balance() checked but found the
load did not require balancing when the cpu was just becoming idle
19) # of times in this domain load_balance() tried to move one or more
tasks and failed, when the cpu was just becoming idle
20) sum of imbalances discovered (if any) with each call to
load_balance() in this domain when the cpu was just becoming idle
21) # of times in this domain pull_task() was called when newly idle
22) # of times in this domain pull_task() was called even though the
target task was cache-hot when just becoming idle
23) # of times in this domain load_balance() was called but did not
find a busier queue while the cpu was just becoming idle
24) # of times in this domain a busier queue was found while the cpu
was just becoming idle but no busier group was found
Next three are active_load_balance() statistics:
25) # of times active_load_balance() was called
26) # of times active_load_balance() tried to move a task and failed
27) # of times active_load_balance() successfully moved a task
Next three are sched_balance_exec() statistics:
28) sbe_cnt is not used
29) sbe_balanced is not used
30) sbe_pushed is not used
Next three are sched_balance_fork() statistics:
31) sbf_cnt is not used
32) sbf_balanced is not used
33) sbf_pushed is not used
Next three are try_to_wake_up() statistics:
34) # of times in this domain try_to_wake_up() awoke a task that
last ran on a different cpu in this domain
35) # of times in this domain try_to_wake_up() moved a task to the
waking cpu because it was cache-cold on its own cpu anyway
36) # of times in this domain try_to_wake_up() started passive balancing
/proc/<pid>/schedstat
----------------
schedstats also adds a new /proc/<pid/schedstat file to include some of
the same information on a per-process level. There are three fields in
this file correlating to fields 20, 21, and 22 in the CPU fields, but
they only apply for that process.
this file correlating for that process to:
1) time spent on the cpu
2) time spent waiting on a runqueue
3) # of timeslices run on this cpu
A program could be easily written to make use of these extra fields to
report on how well a particular process or set of processes is faring
under the scheduler's policies. A simple version of such a program is
available at
http://eaglet.rain.com/rick/linux/schedstat/v10/latency.c
http://eaglet.rain.com/rick/linux/schedstat/v12/latency.c

View File

@ -467,7 +467,12 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
above explicitly.
The power-management is supported.
Module snd-cs5530
_________________
Module for Cyrix/NatSemi Geode 5530 chip.
Module snd-cs5535audio
----------------------
@ -759,6 +764,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
model - force the model name
position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size)
probe_mask - Bitmask to probe codecs (default = -1, meaning all slots)
single_cmd - Use single immediate commands to communicate with
codecs (for debugging only)
enable_msi - Enable Message Signaled Interrupt (MSI) (default = off)
@ -803,6 +809,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
hp-3013 HP machines (3013-variant)
fujitsu Fujitsu S7020
acer Acer TravelMate
will Will laptops (PB V7900)
replacer Replacer 672V
basic fixed pin assignment (old default model)
auto auto-config reading BIOS (default)
@ -811,16 +819,31 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
hp-bpc HP xw4400/6400/8400/9400 laptops
hp-bpc-d7000 HP BPC D7000
benq Benq ED8
benq-t31 Benq T31
hippo Hippo (ATI) with jack detection, Sony UX-90s
hippo_1 Hippo (Benq) with jack detection
sony-assamd Sony ASSAMD
basic fixed pin assignment w/o SPDIF
auto auto-config reading BIOS (default)
ALC268
3stack 3-stack model
auto auto-config reading BIOS (default)
ALC662
3stack-dig 3-stack (2-channel) with SPDIF
3stack-6ch 3-stack (6-channel)
3stack-6ch-dig 3-stack (6-channel) with SPDIF
6stack-dig 6-stack with SPDIF
lenovo-101e Lenovo laptop
auto auto-config reading BIOS (default)
ALC882/885
3stack-dig 3-jack with SPDIF I/O
6stack-dig 6-jack digital with SPDIF I/O
arima Arima W820Di1
macpro MacPro support
imac24 iMac 24'' with jack detection
w2jc ASUS W2JC
auto auto-config reading BIOS (default)
@ -832,9 +855,15 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
6stack-dig-demo 6-jack digital for Intel demo board
acer Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc)
medion Medion Laptops
medion-md2 Medion MD2
targa-dig Targa/MSI
targa-2ch-dig Targs/MSI with 2-channel
laptop-eapd 3-jack with SPDIF I/O and EAPD (Clevo M540JE, M550JE)
lenovo-101e Lenovo 101E
lenovo-nb0763 Lenovo NB0763
lenovo-ms7195-dig Lenovo MS7195
6stack-hp HP machines with 6stack (Nettle boards)
3stack-hp HP machines with 3stack (Lucknow, Samba boards)
auto auto-config reading BIOS (default)
ALC861/660
@ -853,7 +882,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
3stack-dig 3-jack with SPDIF OUT
6stack-dig 6-jack with SPDIF OUT
3stack-660 3-jack (for ALC660VD)
3stack-660-digout 3-jack with SPDIF OUT (for ALC660VD)
lenovo Lenovo 3000 C200
dallas Dallas laptops
auto auto-config reading BIOS (default)
CMI9880
@ -864,12 +895,26 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
allout 5-jack in back, 2-jack in front, SPDIF out
auto auto-config reading BIOS (default)
AD1882
3stack 3-stack mode (default)
6stack 6-stack mode
AD1884
N/A
AD1981
basic 3-jack (default)
hp HP nx6320
thinkpad Lenovo Thinkpad T60/X60/Z60
toshiba Toshiba U205
AD1983
N/A
AD1984
basic default configuration
thinkpad Lenovo Thinkpad T61/X61
AD1986A
6stack 6-jack, separate surrounds (default)
3stack 3-stack, shared surrounds
@ -907,11 +952,18 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
ref Reference board
3stack D945 3stack
5stack D945 5stack + SPDIF
macmini Intel Mac Mini
macbook Intel Mac Book
macbook-pro-v1 Intel Mac Book Pro 1st generation
macbook-pro Intel Mac Book Pro 2nd generation
imac-intel Intel iMac
dell Dell XPS M1210
intel-mac-v1 Intel Mac Type 1
intel-mac-v2 Intel Mac Type 2
intel-mac-v3 Intel Mac Type 3
intel-mac-v4 Intel Mac Type 4
intel-mac-v5 Intel Mac Type 5
macmini Intel Mac Mini (equivalent with type 3)
macbook Intel Mac Book (eq. type 5)
macbook-pro-v1 Intel Mac Book Pro 1st generation (eq. type 3)
macbook-pro Intel Mac Book Pro 2nd generation (eq. type 3)
imac-intel Intel iMac (eq. type 2)
imac-intel-20 Intel iMac (newer version) (eq. type 3)
STAC9202/9250/9251
ref Reference board, base config
@ -956,6 +1008,17 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
from the irq. Remember this is a last resort, and should be
avoided as much as possible...
MORE NOTES ON "azx_get_response timeout" PROBLEMS:
On some hardwares, you may need to add a proper probe_mask option
to avoid the "azx_get_response timeout" problem above, instead.
This occurs when the access to non-existing or non-working codec slot
(likely a modem one) causes a stall of the communication via HD-audio
bus. You can see which codec slots are probed by enabling
CONFIG_SND_DEBUG_DETECT, or simply from the file name of the codec
proc files. Then limit the slots to probe by probe_mask option.
For example, probe_mask=1 means to probe only the first slot, and
probe_mask=4 means only the third slot.
The power-management is supported.
Module snd-hdsp

View File

@ -1,4 +1,4 @@
Guide to using M-Audio Audiophile USB with ALSA and Jack v1.3
Guide to using M-Audio Audiophile USB with ALSA and Jack v1.5
========================================================
Thibault Le Meur <Thibault.LeMeur@supelec.fr>
@ -6,8 +6,19 @@
This document is a guide to using the M-Audio Audiophile USB (tm) device with
ALSA and JACK.
History
=======
* v1.4 - Thibault Le Meur (2007-07-11)
- Added Low Endianness nature of 16bits-modes
found by Hakan Lennestal <Hakan.Lennestal@brfsodrahamn.se>
- Modifying document structure
* v1.5 - Thibault Le Meur (2007-07-12)
- Added AC3/DTS passthru info
1 - Audiophile USB Specs and correct usage
==========================================
This part is a reminder of important facts about the functions and limitations
of the device.
@ -25,18 +36,18 @@ The device has 4 audio interfaces, and 2 MIDI ports:
The internal DAC/ADC has the following characteristics:
* sample depth of 16 or 24 bits
* sample rate from 8kHz to 96kHz
* Two ports can't use different sample depths at the same time. Moreover, the
Audiophile USB documentation gives the following Warning: "Please exit any
audio application running before switching between bit depths"
* Two interfaces can't use different sample depths at the same time.
Moreover, the Audiophile USB documentation gives the following Warning:
"Please exit any audio application running before switching between bit depths"
Due to the USB 1.1 bandwidth limitation, a limited number of interfaces can be
activated at the same time depending on the audio mode selected:
* 16-bit/48kHz ==> 4 channels in/4 channels out
* 16-bit/48kHz ==> 4 channels in + 4 channels out
- Ai+Ao+Di+Do
* 24-bit/48kHz ==> 4 channels in/2 channels out,
or 2 channels in/4 channels out
* 24-bit/48kHz ==> 4 channels in + 2 channels out,
or 2 channels in + 4 channels out
- Ai+Ao+Do or Ai+Di+Ao or Ai+Di+Do or Di+Ao+Do
* 24-bit/96kHz ==> 2 channels in, or 2 channels out (half duplex only)
* 24-bit/96kHz ==> 2 channels in _or_ 2 channels out (half duplex only)
- Ai or Ao or Di or Do
Important facts about the Digital interface:
@ -52,44 +63,56 @@ source is connected
synchronization error (for instance sound played at an odd sample rate)
2 - Audiophile USB support in ALSA
==================================
2 - Audiophile USB MIDI support in ALSA
=======================================
2.1 - MIDI ports
----------------
The Audiophile USB MIDI ports will be automatically supported once the
The Audiophile USB MIDI ports will be automatically supported once the
following modules have been loaded:
* snd-usb-audio
* snd-seq-midi
No additional setting is required.
2.2 - Audio ports
-----------------
3 - Audiophile USB Audio support in ALSA
========================================
Audio functions of the Audiophile USB device are handled by the snd-usb-audio
module. This module can work in a default mode (without any device-specific
parameter), or in an "advanced" mode with the device-specific parameter called
"device_setup".
2.2.1 - Default Alsa driver mode
3.1 - Default Alsa driver mode
------------------------------
The default behavior of the snd-usb-audio driver is to parse the device
capabilities at startup and enable all functions inside the device (including
all ports at any supported sample rates and sample depths). This approach
has the advantage to let the driver easily switch from sample rates/depths
automatically according to the need of the application claiming the device.
The default behavior of the snd-usb-audio driver is to list the device
capabilities at startup and activate the required mode when required
by the applications: for instance if the user is recording in a
24bit-depth-mode and immediately after wants to switch to a 16bit-depth mode,
the snd-usb-audio module will reconfigure the device on the fly.
In this case the Audiophile ports are mapped to alsa pcm devices in the
following way (I suppose the device's index is 1):
This approach has the advantage to let the driver automatically switch from sample
rates/depths automatically according to the user's needs. However, those who
are using the device under windows know that this is not how the device is meant to
work: under windows applications must be closed before using the m-audio control
panel to switch the device working mode. Thus as we'll see in next section, this
Default Alsa driver mode can lead to device misconfigurations.
Let's get back to the Default Alsa driver mode for now. In this case the
Audiophile interfaces are mapped to alsa pcm devices in the following
way (I suppose the device's index is 1):
* hw:1,0 is Ao in playback and Di in capture
* hw:1,1 is Do in playback and Ai in capture
* hw:1,2 is Do in AC3/DTS passthrough mode
You must note as well that the device uses Big Endian byte encoding so that
supported audio format are S16_BE for 16-bit depth modes and S24_3BE for
24-bits depth mode. One exception is the hw:1,2 port which is Little Endian
compliant and thus uses S16_LE.
In this mode, the device uses Big Endian byte-encoding so that
supported audio format are S16_BE for 16-bit depth modes and S24_3BE for
24-bits depth mode.
One exception is the hw:1,2 port which was reported to be Little Endian
compliant (supposedly supporting S16_LE) but processes in fact only S16_BE streams.
This has been fixed in kernel 2.6.23 and above and now the hw:1,2 interface
is reported to be big endian in this default driver mode.
Examples:
* playing a S24_3BE encoded raw file to the Ao port
@ -98,22 +121,26 @@ Examples:
% arecord -D hw:1,1 -c2 -t raw -r48000 -fS24_3BE test.raw
* playing a S16_BE encoded raw file to the Do port
% aplay -D hw:1,1 -c2 -t raw -r48000 -fS16_BE test.raw
* playing an ac3 sample file to the Do port
% aplay -D hw:1,2 --channels=6 ac3_S16_BE_encoded_file.raw
If you're happy with the default Alsa driver setup and don't experience any
If you're happy with the default Alsa driver mode and don't experience any
issue with this mode, then you can skip the following chapter.
2.2.2 - Advanced module setup
3.2 - Advanced module setup
---------------------------
Due to the hardware constraints described above, the device initialization made
by the Alsa driver in default mode may result in a corrupted state of the
device. For instance, a particularly annoying issue is that the sound captured
from the Ai port sounds distorted (as if boosted with an excessive high volume
gain).
from the Ai interface sounds distorted (as if boosted with an excessive high
volume gain).
For people having this problem, the snd-usb-audio module has a new module
parameter called "device_setup".
parameter called "device_setup" (this parameter was introduced in kernel
release 2.6.17)
2.2.2.1 - Initializing the working mode of the Audiophile USB
3.2.1 - Initializing the working mode of the Audiophile USB
As far as the Audiophile USB device is concerned, this value let the user
specify:
@ -121,33 +148,57 @@ specify:
* the sample rate
* whether the Di port is used or not
Here is a list of supported device_setup values for this device:
* device_setup=0x00 (or omitted)
- Alsa driver default mode
- maintains backward compatibility with setups that do not use this
parameter by not introducing any change
- results sometimes in corrupted sound as described earlier
When initialized with "device_setup=0x00", the snd-usb-audio module has
the same behaviour as when the parameter is omitted (see paragraph "Default
Alsa driver mode" above)
Others modes are described in the following subsections.
3.2.1.1 - 16-bit modes
The two supported modes are:
* device_setup=0x01
- 16bits 48kHz mode with Di disabled
- Ai,Ao,Do can be used at the same time
- hw:1,0 is not available in capture mode
- hw:1,2 is not available
* device_setup=0x11
- 16bits 48kHz mode with Di enabled
- Ai,Ao,Di,Do can be used at the same time
- hw:1,0 is available in capture mode
- hw:1,2 is not available
In this modes the device operates only at 16bits-modes. Before kernel 2.6.23,
the devices where reported to be Big-Endian when in fact they were Little-Endian
so that playing a file was a matter of using:
% aplay -D hw:1,1 -c2 -t raw -r48000 -fS16_BE test_S16_LE.raw
where "test_S16_LE.raw" was in fact a little-endian sample file.
Thanks to Hakan Lennestal (who discovered the Little-Endiannes of the device in
these modes) a fix has been committed (expected in kernel 2.6.23) and
Alsa now reports Little-Endian interfaces. Thus playing a file now is as simple as
using:
% aplay -D hw:1,1 -c2 -t raw -r48000 -fS16_LE test_S16_LE.raw
3.2.1.2 - 24-bit modes
The three supported modes are:
* device_setup=0x09
- 24bits 48kHz mode with Di disabled
- Ai,Ao,Do can be used at the same time
- hw:1,0 is not available in capture mode
- hw:1,2 is not available
* device_setup=0x19
- 24bits 48kHz mode with Di enabled
- 3 ports from {Ai,Ao,Di,Do} can be used at the same time
- hw:1,0 is available in capture mode and an active digital source must be
connected to Di
- hw:1,2 is not available
* device_setup=0x0D or 0x10
- 24bits 96kHz mode
- Di is enabled by default for this mode but does not need to be connected
@ -155,34 +206,64 @@ Here is a list of supported device_setup values for this device:
- Only 1 port from {Ai,Ao,Di,Do} can be used at the same time
- hw:1,0 is available in captured mode
- hw:1,2 is not available
In these modes the device is only Big-Endian compliant (see "Default Alsa driver
mode" above for an aplay command example)
3.2.1.3 - AC3 w/ DTS passthru mode
Thanks to Hakan Lennestal, I now have a report saying that this mode works.
* device_setup=0x03
- 16bits 48kHz mode with only the Do port enabled
- AC3 with DTS passthru (not tested)
- AC3 with DTS passthru
- Caution with this setup the Do port is mapped to the pcm device hw:1,0
2.2.2.2 - Setting and switching configurations with the device_setup parameter
The command line used to playback the AC3/DTS encoded .wav-files in this mode:
% aplay -D hw:1,0 --channels=6 ac3_S16_LE_encoded_file.raw
3.2.2 - How to use the device_setup parameter
----------------------------------------------
The parameter can be given:
* By manually probing the device (as root):
# modprobe -r snd-usb-audio
# modprobe snd-usb-audio index=1 device_setup=0x09
* Or while configuring the modules options in your modules configuration file
- For Fedora distributions, edit the /etc/modprobe.conf file:
alias snd-card-1 snd-usb-audio
options snd-usb-audio index=1 device_setup=0x09
IMPORTANT NOTE WHEN SWITCHING CONFIGURATION:
-------------------------------------------
* You may need to _first_ initialize the module with the correct device_setup
parameter and _only_after_ turn on the Audiophile USB device
* This is especially true when switching the sample depth:
CAUTION when initializaing the device
-------------------------------------
* Correct initialization on the device requires that device_setup is given to
the module BEFORE the device is turned on. So, if you use the "manual probing"
method described above, take care to power-on the device AFTER this initialization.
* Failing to respect this will lead in a misconfiguration of the device. In this case
turn off the device, unproble the snd-usb-audio module, then probe it again with
correct device_setup parameter and then (and only then) turn on the device again.
* If you've correctly initialized the device in a valid mode and then want to switch
to another mode (possibly with another sample-depth), please use also the following
procedure:
- first turn off the device
- de-register the snd-usb-audio module (modprobe -r)
- change the device_setup parameter by changing the device_setup
option in /etc/modprobe.conf
- turn on the device
* A workaround for this last issue has been applied to kernel 2.6.23, but it may not
be enough to ensure the 'stability' of the device initialization.
2.2.2.3 - Audiophile USB's device_setup structure
3.2.3 - Technical details for hackers
-------------------------------------
This section is for hackers, wanting to understand details about the device
internals and how Alsa supports it.
3.2.3.1 - Audiophile USB's device_setup structure
If you want to understand the device_setup magic numbers for the Audiophile
USB, you need some very basic understanding of binary computation. However,
@ -228,12 +309,12 @@ Caution:
- choosing b2 will prepare all interfaces for 24bits/96kHz but you'll
only be able to use one at the same time
2.2.3 - USB implementation details for this device
3.2.3.2 - USB implementation details for this device
You may safely skip this section if you're not interested in driver
development.
hacking.
This section describes some internal aspects of the device and summarize the
This section describes some internal aspects of the device and summarizes the
data I got by usb-snooping the windows and Linux drivers.
The M-Audio Audiophile USB has 7 USB Interfaces:
@ -293,43 +374,45 @@ parse_audio_endpoints function uses a quirk called
"audiophile_skip_setting_quirk" in order to prevent AltSettings not
corresponding to device_setup from being registered in the driver.
3 - Audiophile USB and Jack support
4 - Audiophile USB and Jack support
===================================
This section deals with support of the Audiophile USB device in Jack.
The main issue regarding this support is that the device is Big Endian
compliant.
3.1 - Using the plug alsa plugin
--------------------------------
There are 2 main potential issues when using Jackd with the device:
* support for Big-Endian devices in 24-bit modes
* support for 4-in / 4-out channels
Jack doesn't directly support big endian devices. Thus, one way to have support
for this device with Alsa is to use the Alsa "plug" converter.
4.1 - Direct support in Jackd
-----------------------------
Jack supports big endian devices only in recent versions (thanks to
Andreas Steinmetz for his first big-endian patch). I can't remember
extacly when this support was released into jackd, let's just say that
with jackd version 0.103.0 it's almost ok (just a small bug is affecting
16bits Big-Endian devices, but since you've read carefully the above
paragraphs, you're now using kernel >= 2.6.23 and your 16bits devices
are now Little Endians ;-) ).
You can run jackd with the following command for playback with Ao and
record with Ai:
% jackd -R -dalsa -Phw:1,0 -r48000 -p128 -n2 -D -Chw:1,1
4.2 - Using Alsa plughw
-----------------------
If you don't have a recent Jackd installed, you can downgrade to using
the Alsa "plug" converter.
For instance here is one way to run Jack with 2 playback channels on Ao and 2
capture channels from Ai:
% jackd -R -dalsa -dplughw:1 -r48000 -p256 -n2 -D -Cplughw:1,1
However you may see the following warning message:
"You appear to be using the ALSA software "plug" layer, probably a result of
using the "default" ALSA device. This is less efficient than it could be.
Consider using a hardware device instead rather than using the plug layer."
3.2 - Patching alsa to use direct pcm device
--------------------------------------------
A patch for Jack by Andreas Steinmetz adds support for Big Endian devices.
However it has not been included in the CVS tree.
You can find it at the following URL:
http://sourceforge.net/tracker/index.php?func=detail&aid=1289682&group_id=39687&
atid=425939
After having applied the patch you can run jackd with the following command
line:
% jackd -R -dalsa -Phw:1,0 -r48000 -p128 -n2 -D -Chw:1,1
3.2 - Getting 2 input and/or output interfaces in Jack
4.3 - Getting 2 input and/or output interfaces in Jack
------------------------------------------------------
As you can see, starting the Jack server this way will only enable 1 stereo
@ -339,6 +422,7 @@ This is due to the following restrictions:
* Jack can only open one capture device and one playback device at a time
* The Audiophile USB is seen as 2 (or three) Alsa devices: hw:1,0, hw:1,1
(and optionally hw:1,2)
If you want to get Ai+Di and/or Ao+Do support with Jack, you would need to
combine the Alsa devices into one logical "complex" device.
@ -348,13 +432,11 @@ It is related to another device (ice1712) but can be adapted to suit
the Audiophile USB.
Enabling multiple Audiophile USB interfaces for Jackd will certainly require:
* patching Jack with the previously mentioned "Big Endian" patch
* patching Jackd with the MMAP_COMPLEX patch (see the ice1712 page)
* patching the alsa-lib/src/pcm/pcm_multi.c file (see the ice1712 page)
* Making sure your Jackd version has the MMAP_COMPLEX patch (see the ice1712 page)
* (maybe) patching the alsa-lib/src/pcm/pcm_multi.c file (see the ice1712 page)
* define a multi device (combination of hw:1,0 and hw:1,1) in your .asoundrc
file
* start jackd with this device
I had no success in testing this for now, but this may be due to my OS
configuration. If you have any success with this kind of setup, please
drop me an email.
I had no success in testing this for now, if you have any success with this kind
of setup, please drop me an email.

View File

@ -278,6 +278,21 @@ current mixer configuration by reading and writing the whole file
image.
Duplex Streams
==============
Note that when attempting to use a single device file for playback and
capture, the OSS API provides no way to set the format, sample rate or
number of channels different in each direction. Thus
io_handle = open("device", O_RDWR)
will only function correctly if the values are the same in each direction.
To use different values in the two directions, use both
input_handle = open("device", O_RDONLY)
output_handle = open("device", O_WRONLY)
and set the values for the corresponding handle.
Unsupported Features
====================

View File

@ -0,0 +1,202 @@
/*
* SPI testing utility (using spidev driver)
*
* Copyright (c) 2007 MontaVista Software, Inc.
* Copyright (c) 2007 Anton Vorontsov <avorontsov@ru.mvista.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License.
*
* Cross-compile with cross-gcc -I/path/to/cross-kernel/include
*/
#include <stdint.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <linux/types.h>
#include <linux/spi/spidev.h>
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
static void pabort(const char *s)
{
perror(s);
abort();
}
static char *device = "/dev/spidev1.1";
static uint8_t mode;
static uint8_t bits = 8;
static uint32_t speed = 500000;
static uint16_t delay;
static void transfer(int fd)
{
int ret;
uint8_t tx[] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0x40, 0x00, 0x00, 0x00, 0x00, 0x95,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xDE, 0xAD, 0xBE, 0xEF, 0xBA, 0xAD,
0xF0, 0x0D,
};
uint8_t rx[ARRAY_SIZE(tx)] = {0, };
struct spi_ioc_transfer tr = {
.tx_buf = (unsigned long)tx,
.rx_buf = (unsigned long)rx,
.len = ARRAY_SIZE(tx),
.delay_usecs = delay,
.speed_hz = speed,
.bits_per_word = bits,
};
ret = ioctl(fd, SPI_IOC_MESSAGE(1), &tr);
if (ret == 1)
pabort("can't send spi message");
for (ret = 0; ret < ARRAY_SIZE(tx); ret++) {
if (!(ret % 6))
puts("");
printf("%.2X ", rx[ret]);
}
puts("");
}
void print_usage(char *prog)
{
printf("Usage: %s [-DsbdlHOLC3]\n", prog);
puts(" -D --device device to use (default /dev/spidev1.1)\n"
" -s --speed max speed (Hz)\n"
" -d --delay delay (usec)\n"
" -b --bpw bits per word \n"
" -l --loop loopback\n"
" -H --cpha clock phase\n"
" -O --cpol clock polarity\n"
" -L --lsb least significant bit first\n"
" -C --cs-high chip select active high\n"
" -3 --3wire SI/SO signals shared\n");
exit(1);
}
void parse_opts(int argc, char *argv[])
{
while (1) {
static struct option lopts[] = {
{ "device", 1, 0, 'D' },
{ "speed", 1, 0, 's' },
{ "delay", 1, 0, 'd' },
{ "bpw", 1, 0, 'b' },
{ "loop", 0, 0, 'l' },
{ "cpha", 0, 0, 'H' },
{ "cpol", 0, 0, 'O' },
{ "lsb", 0, 0, 'L' },
{ "cs-high", 0, 0, 'C' },
{ "3wire", 0, 0, '3' },
{ NULL, 0, 0, 0 },
};
int c;
c = getopt_long(argc, argv, "D:s:d:b:lHOLC3", lopts, NULL);
if (c == -1)
break;
switch (c) {
case 'D':
device = optarg;
break;
case 's':
speed = atoi(optarg);
break;
case 'd':
delay = atoi(optarg);
break;
case 'b':
bits = atoi(optarg);
break;
case 'l':
mode |= SPI_LOOP;
break;
case 'H':
mode |= SPI_CPHA;
break;
case 'O':
mode |= SPI_CPOL;
break;
case 'L':
mode |= SPI_LSB_FIRST;
break;
case 'C':
mode |= SPI_CS_HIGH;
break;
case '3':
mode |= SPI_3WIRE;
break;
default:
print_usage(argv[0]);
break;
}
}
}
int main(int argc, char *argv[])
{
int ret = 0;
int fd;
parse_opts(argc, argv);
fd = open(device, O_RDWR);
if (fd < 0)
pabort("can't open device");
/*
* spi mode
*/
ret = ioctl(fd, SPI_IOC_WR_MODE, &mode);
if (ret == -1)
pabort("can't set spi mode");
ret = ioctl(fd, SPI_IOC_RD_MODE, &mode);
if (ret == -1)
pabort("can't get spi mode");
/*
* bits per word
*/
ret = ioctl(fd, SPI_IOC_WR_BITS_PER_WORD, &bits);
if (ret == -1)
pabort("can't set bits per word");
ret = ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits);
if (ret == -1)
pabort("can't get bits per word");
/*
* max speed hz
*/
ret = ioctl(fd, SPI_IOC_WR_MAX_SPEED_HZ, &speed);
if (ret == -1)
pabort("can't set max speed hz");
ret = ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed);
if (ret == -1)
pabort("can't get max speed hz");
printf("spi mode: %d\n", mode);
printf("bits per word: %d\n", bits);
printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000);
transfer(fd);
close(fd);
return ret;
}

Some files were not shown because too many files have changed in this diff Show More