Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: drivers/macintosh/adbhid.c
This commit is contained in:
commit
b981d8b3f5
2
.gitignore
vendored
2
.gitignore
vendored
@ -7,6 +7,7 @@
|
||||
#
|
||||
.*
|
||||
*.o
|
||||
*.o.*
|
||||
*.a
|
||||
*.s
|
||||
*.ko
|
||||
@ -22,6 +23,7 @@
|
||||
tags
|
||||
TAGS
|
||||
vmlinux*
|
||||
!vmlinux.lds.S
|
||||
System.map
|
||||
Module.symvers
|
||||
|
||||
|
46
CREDITS
46
CREDITS
@ -665,6 +665,11 @@ D: Minor updates to SCSI types, added /proc/pid/maps protection
|
||||
S: (ask for current address)
|
||||
S: USA
|
||||
|
||||
N: Robin Cornelius
|
||||
E: robincornelius@users.sourceforge.net
|
||||
D: Ralink rt2x00 WLAN driver
|
||||
S: Cornwall, U.K.
|
||||
|
||||
N: Mark Corner
|
||||
E: mcorner@umich.edu
|
||||
W: http://www.eecs.umich.edu/~mcorner/
|
||||
@ -679,6 +684,11 @@ D: Kernel module SMART utilities
|
||||
S: Santa Cruz, California
|
||||
S: USA
|
||||
|
||||
N: Luis Correia
|
||||
E: lfcorreia@users.sf.net
|
||||
D: Ralink rt2x00 WLAN driver
|
||||
S: Belas, Portugal
|
||||
|
||||
N: Alan Cox
|
||||
W: http://www.linux.org.uk/diary/
|
||||
D: Linux Networking (0.99.10->2.0.29)
|
||||
@ -833,6 +843,12 @@ S: Lancs
|
||||
S: PR4 6AX
|
||||
S: United Kingdom
|
||||
|
||||
N: Ivo van Doorn
|
||||
E: IvDoorn@gmail.com
|
||||
W: http://www.mendiosus.nl
|
||||
D: Ralink rt2x00 WLAN driver
|
||||
S: Haarlem, The Netherlands
|
||||
|
||||
N: John G Dorsey
|
||||
E: john+@cs.cmu.edu
|
||||
D: ARM Linux ports to Assabet/Neponset, Spot
|
||||
@ -966,6 +982,7 @@ N: Pekka Enberg
|
||||
E: penberg@cs.helsinki.fi
|
||||
W: http://www.cs.helsinki.fi/u/penberg/
|
||||
D: Various kernel hacks, fixes, and cleanups.
|
||||
D: Slab allocators
|
||||
S: Finland
|
||||
|
||||
N: David Engebretsen
|
||||
@ -1939,8 +1956,8 @@ D: for Menuconfig's lxdialog.
|
||||
N: Christoph Lameter
|
||||
E: christoph@lameter.com
|
||||
D: Digiboard PC/Xe and PC/Xi, Digiboard EPCA
|
||||
D: Early protocol filter for bridging code
|
||||
D: Bug fixes
|
||||
D: NUMA support, Slab allocators, Page migration
|
||||
D: Scalability, Time subsystem
|
||||
|
||||
N: Paul Laufer
|
||||
E: paul@laufernet.com
|
||||
@ -2212,13 +2229,13 @@ S: 2300 Copenhagen S
|
||||
S: Denmark
|
||||
|
||||
N: Claudio S. Matsuoka
|
||||
E: claudio@conectiva.com
|
||||
E: claudio@helllabs.org
|
||||
E: cmatsuoka@gmail.com
|
||||
E: claudio@mandriva.com
|
||||
W: http://helllabs.org/~claudio
|
||||
D: V4L, OV511 driver hacks
|
||||
D: V4L, OV511 and HDA-codec hacks
|
||||
S: Conectiva S.A.
|
||||
S: R. Tocantins 89
|
||||
S: 80050-430 Curitiba PR
|
||||
S: Souza Naves 1250
|
||||
S: 80050-040 Curitiba PR
|
||||
S: Brazil
|
||||
|
||||
N: Heinz Mauelshagen
|
||||
@ -3516,6 +3533,12 @@ S: Maastrichterweg 63
|
||||
S: 5554 GG Valkenswaard
|
||||
S: The Netherlands
|
||||
|
||||
N: Mark Wallis
|
||||
E: mwallis@serialmonkey.com
|
||||
W: http://mark.serialmonkey.com
|
||||
D: Ralink rt2x00 WLAN driver
|
||||
S: Newcastle, Australia
|
||||
|
||||
N: Peter Shaobo Wang
|
||||
E: pwang@mmdcorp.com
|
||||
W: http://www.mmdcorp.com/pw/linux
|
||||
@ -3650,6 +3673,15 @@ S: Alte Regensburger Str. 11a
|
||||
S: 93149 Nittenau
|
||||
S: Germany
|
||||
|
||||
N: Gertjan van Wingerde
|
||||
E: gwingerde@home.nl
|
||||
D: Ralink rt2x00 WLAN driver
|
||||
D: Minix V2 file-system
|
||||
D: Misc fixes
|
||||
S: Geessinkweg 177
|
||||
S: 7544 TX Enschede
|
||||
S: The Netherlands
|
||||
|
||||
N: Lars Wirzenius
|
||||
E: liw@iki.fi
|
||||
D: Linux System Administrator's Guide, author, former maintainer
|
||||
|
@ -134,8 +134,6 @@ dvb/
|
||||
- info on Linux Digital Video Broadcast (DVB) subsystem.
|
||||
early-userspace/
|
||||
- info about initramfs, klibc, and userspace early during boot.
|
||||
ecryptfs.txt
|
||||
- docs on eCryptfs: stacked cryptographic filesystem for Linux.
|
||||
eisa.txt
|
||||
- info on EISA bus support.
|
||||
exception.txt
|
||||
|
@ -45,6 +45,7 @@ o nfs-utils 1.0.5 # showmount --version
|
||||
o procps 3.2.0 # ps --version
|
||||
o oprofile 0.9 # oprofiled --version
|
||||
o udev 081 # udevinfo -V
|
||||
o grub 0.93 # grub --version
|
||||
|
||||
Kernel compilation
|
||||
==================
|
||||
|
@ -633,12 +633,27 @@ covers RTL which is used frequently with assembly language in the kernel.
|
||||
|
||||
Kernel developers like to be seen as literate. Do mind the spelling
|
||||
of kernel messages to make a good impression. Do not use crippled
|
||||
words like "dont" and use "do not" or "don't" instead.
|
||||
words like "dont"; use "do not" or "don't" instead. Make the messages
|
||||
concise, clear, and unambiguous.
|
||||
|
||||
Kernel messages do not have to be terminated with a period.
|
||||
|
||||
Printing numbers in parentheses (%d) adds no value and should be avoided.
|
||||
|
||||
There are a number of driver model diagnostic macros in <linux/device.h>
|
||||
which you should use to make sure messages are matched to the right device
|
||||
and driver, and are tagged with the right level: dev_err(), dev_warn(),
|
||||
dev_info(), and so forth. For messages that aren't associated with a
|
||||
particular device, <linux/kernel.h> defines pr_debug() and pr_info().
|
||||
|
||||
Coming up with good debugging messages can be quite a challenge; and once
|
||||
you have them, they can be a huge help for remote troubleshooting. Such
|
||||
messages should be compiled out when the DEBUG symbol is not defined (that
|
||||
is, by default they are not included). When you use dev_dbg() or pr_debug(),
|
||||
that's automatic. Many subsystems have Kconfig options to turn on -DDEBUG.
|
||||
A related convention uses VERBOSE_DEBUG to add dev_vdbg() messages to the
|
||||
ones already enabled by DEBUG.
|
||||
|
||||
|
||||
Chapter 14: Allocating memory
|
||||
|
||||
@ -790,4 +805,5 @@ Kernel CodingStyle, by greg@kroah.com at OLS 2002:
|
||||
http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
|
||||
|
||||
--
|
||||
Last updated on 2006-December-06.
|
||||
Last updated on 2007-July-13.
|
||||
|
||||
|
@ -26,7 +26,7 @@ Part Ia - Using large dma-coherent buffers
|
||||
|
||||
void *
|
||||
dma_alloc_coherent(struct device *dev, size_t size,
|
||||
dma_addr_t *dma_handle, int flag)
|
||||
dma_addr_t *dma_handle, gfp_t flag)
|
||||
void *
|
||||
pci_alloc_consistent(struct pci_dev *dev, size_t size,
|
||||
dma_addr_t *dma_handle)
|
||||
@ -38,7 +38,7 @@ to make sure to flush the processor's write buffers before telling
|
||||
devices to read that memory.)
|
||||
|
||||
This routine allocates a region of <size> bytes of consistent memory.
|
||||
it also returns a <dma_handle> which may be cast to an unsigned
|
||||
It also returns a <dma_handle> which may be cast to an unsigned
|
||||
integer the same width as the bus and used as the physical address
|
||||
base of the region.
|
||||
|
||||
@ -52,21 +52,24 @@ The simplest way to do that is to use the dma_pool calls (see below).
|
||||
|
||||
The flag parameter (dma_alloc_coherent only) allows the caller to
|
||||
specify the GFP_ flags (see kmalloc) for the allocation (the
|
||||
implementation may chose to ignore flags that affect the location of
|
||||
implementation may choose to ignore flags that affect the location of
|
||||
the returned memory, like GFP_DMA). For pci_alloc_consistent, you
|
||||
must assume GFP_ATOMIC behaviour.
|
||||
|
||||
void
|
||||
dma_free_coherent(struct device *dev, size_t size, void *cpu_addr
|
||||
dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
|
||||
dma_addr_t dma_handle)
|
||||
void
|
||||
pci_free_consistent(struct pci_dev *dev, size_t size, void *cpu_addr
|
||||
pci_free_consistent(struct pci_dev *dev, size_t size, void *cpu_addr,
|
||||
dma_addr_t dma_handle)
|
||||
|
||||
Free the region of consistent memory you previously allocated. dev,
|
||||
size and dma_handle must all be the same as those passed into the
|
||||
consistent allocate. cpu_addr must be the virtual address returned by
|
||||
the consistent allocate
|
||||
the consistent allocate.
|
||||
|
||||
Note that unlike their sibling allocation calls, these routines
|
||||
may only be called with IRQs enabled.
|
||||
|
||||
|
||||
Part Ib - Using small dma-coherent buffers
|
||||
@ -77,9 +80,9 @@ To get this part of the dma_ API, you must #include <linux/dmapool.h>
|
||||
Many drivers need lots of small dma-coherent memory regions for DMA
|
||||
descriptors or I/O buffers. Rather than allocating in units of a page
|
||||
or more using dma_alloc_coherent(), you can use DMA pools. These work
|
||||
much like a struct kmem_cache, except that they use the dma-coherent allocator
|
||||
much like a struct kmem_cache, except that they use the dma-coherent allocator,
|
||||
not __get_free_pages(). Also, they understand common hardware constraints
|
||||
for alignment, like queue heads needing to be aligned on N byte boundaries.
|
||||
for alignment, like queue heads needing to be aligned on N-byte boundaries.
|
||||
|
||||
|
||||
struct dma_pool *
|
||||
@ -102,15 +105,15 @@ crossing restrictions, pass 0 for alloc; passing 4096 says memory allocated
|
||||
from this pool must not cross 4KByte boundaries.
|
||||
|
||||
|
||||
void *dma_pool_alloc(struct dma_pool *pool, int gfp_flags,
|
||||
void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
|
||||
dma_addr_t *dma_handle);
|
||||
|
||||
void *pci_pool_alloc(struct pci_pool *pool, int gfp_flags,
|
||||
void *pci_pool_alloc(struct pci_pool *pool, gfp_t gfp_flags,
|
||||
dma_addr_t *dma_handle);
|
||||
|
||||
This allocates memory from the pool; the returned memory will meet the size
|
||||
and alignment requirements specified at creation time. Pass GFP_ATOMIC to
|
||||
prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks)
|
||||
prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks),
|
||||
pass GFP_KERNEL to allow blocking. Like dma_alloc_coherent(), this returns
|
||||
two values: an address usable by the cpu, and the dma address usable by the
|
||||
pool's device.
|
||||
@ -123,7 +126,7 @@ pool's device.
|
||||
dma_addr_t addr);
|
||||
|
||||
This puts memory back into the pool. The pool is what was passed to
|
||||
the pool allocation routine; the cpu and dma addresses are what
|
||||
the pool allocation routine; the cpu (vaddr) and dma addresses are what
|
||||
were returned when that routine allocated the memory being freed.
|
||||
|
||||
|
||||
@ -209,18 +212,18 @@ Notes: Not all memory regions in a machine can be mapped by this
|
||||
API. Further, regions that appear to be physically contiguous in
|
||||
kernel virtual space may not be contiguous as physical memory. Since
|
||||
this API does not provide any scatter/gather capability, it will fail
|
||||
if the user tries to map a non physically contiguous piece of memory.
|
||||
if the user tries to map a non-physically contiguous piece of memory.
|
||||
For this reason, it is recommended that memory mapped by this API be
|
||||
obtained only from sources which guarantee to be physically contiguous
|
||||
obtained only from sources which guarantee it to be physically contiguous
|
||||
(like kmalloc).
|
||||
|
||||
Further, the physical address of the memory must be within the
|
||||
dma_mask of the device (the dma_mask represents a bit mask of the
|
||||
addressable region for the device. i.e. if the physical address of
|
||||
addressable region for the device. I.e., if the physical address of
|
||||
the memory anded with the dma_mask is still equal to the physical
|
||||
address, then the device can perform DMA to the memory). In order to
|
||||
ensure that the memory allocated by kmalloc is within the dma_mask,
|
||||
the driver may specify various platform dependent flags to restrict
|
||||
the driver may specify various platform-dependent flags to restrict
|
||||
the physical memory range of the allocation (e.g. on x86, GFP_DMA
|
||||
guarantees to be within the first 16Mb of available physical memory,
|
||||
as required by ISA devices).
|
||||
@ -244,14 +247,14 @@ are guaranteed also to be cache line boundaries).
|
||||
|
||||
DMA_TO_DEVICE synchronisation must be done after the last modification
|
||||
of the memory region by the software and before it is handed off to
|
||||
the driver. Once this primitive is used. Memory covered by this
|
||||
primitive should be treated as read only by the device. If the device
|
||||
the driver. Once this primitive is used, memory covered by this
|
||||
primitive should be treated as read-only by the device. If the device
|
||||
may write to it at any point, it should be DMA_BIDIRECTIONAL (see
|
||||
below).
|
||||
|
||||
DMA_FROM_DEVICE synchronisation must be done before the driver
|
||||
accesses data that may be changed by the device. This memory should
|
||||
be treated as read only by the driver. If the driver needs to write
|
||||
be treated as read-only by the driver. If the driver needs to write
|
||||
to it at any point, it should be DMA_BIDIRECTIONAL (see below).
|
||||
|
||||
DMA_BIDIRECTIONAL requires special handling: it means that the driver
|
||||
@ -261,7 +264,7 @@ you must always sync bidirectional memory twice: once before the
|
||||
memory is handed off to the device (to make sure all memory changes
|
||||
are flushed from the processor) and once before the data may be
|
||||
accessed after being used by the device (to make sure any processor
|
||||
cache lines are updated with data that the device may have changed.
|
||||
cache lines are updated with data that the device may have changed).
|
||||
|
||||
void
|
||||
dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
|
||||
@ -302,8 +305,8 @@ pci_dma_mapping_error(dma_addr_t dma_addr)
|
||||
|
||||
In some circumstances dma_map_single and dma_map_page will fail to create
|
||||
a mapping. A driver can check for these errors by testing the returned
|
||||
dma address with dma_mapping_error(). A non zero return value means the mapping
|
||||
could not be created and the driver should take appropriate action (eg
|
||||
dma address with dma_mapping_error(). A non-zero return value means the mapping
|
||||
could not be created and the driver should take appropriate action (e.g.
|
||||
reduce current DMA mapping usage or delay and try again later).
|
||||
|
||||
int
|
||||
@ -315,7 +318,7 @@ reduce current DMA mapping usage or delay and try again later).
|
||||
|
||||
Maps a scatter gather list from the block layer.
|
||||
|
||||
Returns: the number of physical segments mapped (this may be shorted
|
||||
Returns: the number of physical segments mapped (this may be shorter
|
||||
than <nents> passed in if the block layer determines that some
|
||||
elements of the scatter/gather list are physically adjacent and thus
|
||||
may be mapped with a single entry).
|
||||
@ -357,7 +360,7 @@ accessed sg->address and sg->length as shown above.
|
||||
pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
|
||||
int nents, int direction)
|
||||
|
||||
unmap the previously mapped scatter/gather list. All the parameters
|
||||
Unmap the previously mapped scatter/gather list. All the parameters
|
||||
must be the same as those and passed in to the scatter/gather mapping
|
||||
API.
|
||||
|
||||
@ -377,7 +380,7 @@ void
|
||||
pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg,
|
||||
int nelems, int direction)
|
||||
|
||||
synchronise a single contiguous or scatter/gather mapping. All the
|
||||
Synchronise a single contiguous or scatter/gather mapping. All the
|
||||
parameters must be the same as those passed into the single mapping
|
||||
API.
|
||||
|
||||
@ -406,7 +409,7 @@ API at all.
|
||||
|
||||
void *
|
||||
dma_alloc_noncoherent(struct device *dev, size_t size,
|
||||
dma_addr_t *dma_handle, int flag)
|
||||
dma_addr_t *dma_handle, gfp_t flag)
|
||||
|
||||
Identical to dma_alloc_coherent() except that the platform will
|
||||
choose to return either consistent or non-consistent memory as it sees
|
||||
@ -426,34 +429,34 @@ void
|
||||
dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr,
|
||||
dma_addr_t dma_handle)
|
||||
|
||||
free memory allocated by the nonconsistent API. All parameters must
|
||||
Free memory allocated by the nonconsistent API. All parameters must
|
||||
be identical to those passed in (and returned by
|
||||
dma_alloc_noncoherent()).
|
||||
|
||||
int
|
||||
dma_is_consistent(struct device *dev, dma_addr_t dma_handle)
|
||||
|
||||
returns true if the device dev is performing consistent DMA on the memory
|
||||
Returns true if the device dev is performing consistent DMA on the memory
|
||||
area pointed to by the dma_handle.
|
||||
|
||||
int
|
||||
dma_get_cache_alignment(void)
|
||||
|
||||
returns the processor cache alignment. This is the absolute minimum
|
||||
Returns the processor cache alignment. This is the absolute minimum
|
||||
alignment *and* width that you must observe when either mapping
|
||||
memory or doing partial flushes.
|
||||
|
||||
Notes: This API may return a number *larger* than the actual cache
|
||||
line, but it will guarantee that one or more cache lines fit exactly
|
||||
into the width returned by this call. It will also always be a power
|
||||
of two for easy alignment
|
||||
of two for easy alignment.
|
||||
|
||||
void
|
||||
dma_sync_single_range(struct device *dev, dma_addr_t dma_handle,
|
||||
unsigned long offset, size_t size,
|
||||
enum dma_data_direction direction)
|
||||
|
||||
does a partial sync. starting at offset and continuing for size. You
|
||||
Does a partial sync, starting at offset and continuing for size. You
|
||||
must be careful to observe the cache alignment and width when doing
|
||||
anything like this. You must also be extra careful about accessing
|
||||
memory you intend to sync partially.
|
||||
@ -472,21 +475,20 @@ dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
|
||||
dma_addr_t device_addr, size_t size, int
|
||||
flags)
|
||||
|
||||
|
||||
Declare region of memory to be handed out by dma_alloc_coherent when
|
||||
it's asked for coherent memory for this device.
|
||||
|
||||
bus_addr is the physical address to which the memory is currently
|
||||
assigned in the bus responding region (this will be used by the
|
||||
platform to perform the mapping)
|
||||
platform to perform the mapping).
|
||||
|
||||
device_addr is the physical address the device needs to be programmed
|
||||
with actually to address this memory (this will be handed out as the
|
||||
dma_addr_t in dma_alloc_coherent())
|
||||
dma_addr_t in dma_alloc_coherent()).
|
||||
|
||||
size is the size of the area (must be multiples of PAGE_SIZE).
|
||||
|
||||
flags can be or'd together and are
|
||||
flags can be or'd together and are:
|
||||
|
||||
DMA_MEMORY_MAP - request that the memory returned from
|
||||
dma_alloc_coherent() be directly writable.
|
||||
@ -494,7 +496,7 @@ dma_alloc_coherent() be directly writable.
|
||||
DMA_MEMORY_IO - request that the memory returned from
|
||||
dma_alloc_coherent() be addressable using read/write/memcpy_toio etc.
|
||||
|
||||
One or both of these flags must be present
|
||||
One or both of these flags must be present.
|
||||
|
||||
DMA_MEMORY_INCLUDES_CHILDREN - make the declared memory be allocated by
|
||||
dma_alloc_coherent of any child devices of this one (for memory residing
|
||||
@ -528,7 +530,7 @@ dma_release_declared_memory(struct device *dev)
|
||||
Remove the memory region previously declared from the system. This
|
||||
API performs *no* in-use checking for this region and will return
|
||||
unconditionally having removed all the required structures. It is the
|
||||
drivers job to ensure that no parts of this memory region are
|
||||
driver's job to ensure that no parts of this memory region are
|
||||
currently in use.
|
||||
|
||||
void *
|
||||
@ -538,12 +540,10 @@ dma_mark_declared_memory_occupied(struct device *dev,
|
||||
This is used to occupy specific regions of the declared space
|
||||
(dma_alloc_coherent() will hand out the first free region it finds).
|
||||
|
||||
device_addr is the *device* address of the region requested
|
||||
device_addr is the *device* address of the region requested.
|
||||
|
||||
size is the size (and should be a page sized multiple).
|
||||
size is the size (and should be a page-sized multiple).
|
||||
|
||||
The return value will be either a pointer to the processor virtual
|
||||
address of the memory, or an error (via PTR_ERR()) if any part of the
|
||||
region is occupied.
|
||||
|
||||
|
||||
|
@ -11,15 +11,15 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
|
||||
procfs-guide.xml writing_usb_driver.xml \
|
||||
kernel-api.xml filesystems.xml lsm.xml usb.xml \
|
||||
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
|
||||
genericirq.xml
|
||||
genericirq.xml s390-drivers.xml
|
||||
|
||||
###
|
||||
# The build process is as follows (targets):
|
||||
# (xmldocs)
|
||||
# file.tmpl --> file.xml +--> file.ps (psdocs)
|
||||
# +--> file.pdf (pdfdocs)
|
||||
# +--> DIR=file (htmldocs)
|
||||
# +--> man/ (mandocs)
|
||||
# (xmldocs) [by docproc]
|
||||
# file.tmpl --> file.xml +--> file.ps (psdocs) [by db2ps or xmlto]
|
||||
# +--> file.pdf (pdfdocs) [by db2pdf or xmlto]
|
||||
# +--> DIR=file (htmldocs) [by xmlto]
|
||||
# +--> man/ (mandocs) [by xmlto]
|
||||
|
||||
|
||||
# for PDF and PS output you can choose between xmlto and docbook-utils tools
|
||||
|
@ -316,7 +316,8 @@ CPU B: spin_unlock_irqrestore(&dev_lock, flags)
|
||||
|
||||
<chapter id="pubfunctions">
|
||||
<title>Public Functions Provided</title>
|
||||
!Einclude/asm-i386/io.h
|
||||
!Iinclude/asm-i386/io.h
|
||||
!Elib/iomap.c
|
||||
</chapter>
|
||||
|
||||
</book>
|
||||
|
@ -159,7 +159,6 @@ X!Ilib/string.c
|
||||
!Earch/i386/lib/usercopy.c
|
||||
</sect1>
|
||||
<sect1><title>More Memory Management Functions</title>
|
||||
!Iinclude/linux/rmap.h
|
||||
!Emm/readahead.c
|
||||
!Emm/filemap.c
|
||||
!Emm/memory.c
|
||||
@ -241,17 +240,23 @@ X!Ilib/string.c
|
||||
<sect1><title>Driver Support</title>
|
||||
!Enet/core/dev.c
|
||||
!Enet/ethernet/eth.c
|
||||
!Enet/sched/sch_generic.c
|
||||
!Iinclude/linux/etherdevice.h
|
||||
!Iinclude/linux/netdevice.h
|
||||
</sect1>
|
||||
<sect1><title>PHY Support</title>
|
||||
!Edrivers/net/phy/phy.c
|
||||
!Idrivers/net/phy/phy.c
|
||||
!Edrivers/net/phy/phy_device.c
|
||||
!Idrivers/net/phy/phy_device.c
|
||||
!Edrivers/net/phy/mdio_bus.c
|
||||
!Idrivers/net/phy/mdio_bus.c
|
||||
<!-- FIXME: Removed for now since no structured comments in source
|
||||
X!Enet/core/wireless.c
|
||||
-->
|
||||
</sect1>
|
||||
<!-- FIXME: Removed for now since no structured comments in source
|
||||
<sect1><title>Wireless</title>
|
||||
X!Enet/core/wireless.c
|
||||
</sect1>
|
||||
-->
|
||||
<sect1><title>Synchronous PPP</title>
|
||||
!Edrivers/net/wan/syncppp.c
|
||||
</sect1>
|
||||
@ -381,7 +386,6 @@ X!Edrivers/base/interface.c
|
||||
!Edrivers/base/bus.c
|
||||
</sect1>
|
||||
<sect1><title>Device Drivers Power Management</title>
|
||||
!Edrivers/base/power/main.c
|
||||
!Edrivers/base/power/resume.c
|
||||
!Edrivers/base/power/suspend.c
|
||||
</sect1>
|
||||
@ -399,15 +403,19 @@ X!Edrivers/acpi/pci_bind.c
|
||||
-->
|
||||
</sect1>
|
||||
<sect1><title>Device drivers PnP support</title>
|
||||
!Edrivers/pnp/core.c
|
||||
!Idrivers/pnp/core.c
|
||||
<!-- No correct structured comments
|
||||
X!Edrivers/pnp/system.c
|
||||
-->
|
||||
!Edrivers/pnp/card.c
|
||||
!Edrivers/pnp/driver.c
|
||||
!Idrivers/pnp/driver.c
|
||||
!Edrivers/pnp/manager.c
|
||||
!Edrivers/pnp/support.c
|
||||
</sect1>
|
||||
<sect1><title>Userspace IO devices</title>
|
||||
!Edrivers/uio/uio.c
|
||||
!Iinclude/linux/uio_driver.h
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="blkdev">
|
||||
@ -701,14 +709,22 @@ X!Idrivers/video/console/fonts.c
|
||||
|
||||
<chapter id="splice">
|
||||
<title>splice API</title>
|
||||
<para>)
|
||||
<para>
|
||||
splice is a method for moving blocks of data around inside the
|
||||
kernel, without continually transferring it between the kernel
|
||||
kernel, without continually transferring them between the kernel
|
||||
and user space.
|
||||
</para>
|
||||
!Iinclude/linux/splice.h
|
||||
!Ffs/splice.c
|
||||
</chapter>
|
||||
|
||||
<chapter id="pipes">
|
||||
<title>pipes API</title>
|
||||
<para>
|
||||
Pipe interfaces are all for in-kernel (builtin image) use.
|
||||
They are not exported for use by modules.
|
||||
</para>
|
||||
!Iinclude/linux/pipe_fs_i.h
|
||||
!Ffs/pipe.c
|
||||
</chapter>
|
||||
|
||||
</book>
|
||||
|
@ -219,7 +219,7 @@
|
||||
</para>
|
||||
|
||||
<sect1 id="lock-intro">
|
||||
<title>Two Main Types of Kernel Locks: Spinlocks and Semaphores</title>
|
||||
<title>Three Main Types of Kernel Locks: Spinlocks, Mutexes and Semaphores</title>
|
||||
|
||||
<para>
|
||||
There are three main types of kernel locks. The fundamental type
|
||||
|
@ -456,8 +456,9 @@ void (*irq_clear) (struct ata_port *);
|
||||
|
||||
<sect2><title>SATA phy read/write</title>
|
||||
<programlisting>
|
||||
u32 (*scr_read) (struct ata_port *ap, unsigned int sc_reg);
|
||||
void (*scr_write) (struct ata_port *ap, unsigned int sc_reg,
|
||||
int (*scr_read) (struct ata_port *ap, unsigned int sc_reg,
|
||||
u32 *val);
|
||||
int (*scr_write) (struct ata_port *ap, unsigned int sc_reg,
|
||||
u32 val);
|
||||
</programlisting>
|
||||
|
||||
|
149
Documentation/DocBook/s390-drivers.tmpl
Normal file
149
Documentation/DocBook/s390-drivers.tmpl
Normal file
@ -0,0 +1,149 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
|
||||
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
|
||||
|
||||
<book id="s390drivers">
|
||||
<bookinfo>
|
||||
<title>Writing s390 channel device drivers</title>
|
||||
|
||||
<authorgroup>
|
||||
<author>
|
||||
<firstname>Cornelia</firstname>
|
||||
<surname>Huck</surname>
|
||||
<affiliation>
|
||||
<address>
|
||||
<email>cornelia.huck@de.ibm.com</email>
|
||||
</address>
|
||||
</affiliation>
|
||||
</author>
|
||||
</authorgroup>
|
||||
|
||||
<copyright>
|
||||
<year>2007</year>
|
||||
<holder>IBM Corp.</holder>
|
||||
</copyright>
|
||||
|
||||
<legalnotice>
|
||||
<para>
|
||||
This documentation is free software; you can redistribute
|
||||
it and/or modify it under the terms of the GNU General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2 of the License, or (at your option) any later
|
||||
version.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
This program is distributed in the hope that it will be
|
||||
useful, but WITHOUT ANY WARRANTY; without even the implied
|
||||
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
You should have received a copy of the GNU General Public
|
||||
License along with this program; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
||||
MA 02111-1307 USA
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For more details see the file COPYING in the source
|
||||
distribution of Linux.
|
||||
</para>
|
||||
</legalnotice>
|
||||
</bookinfo>
|
||||
|
||||
<toc></toc>
|
||||
|
||||
<chapter id="intro">
|
||||
<title>Introduction</title>
|
||||
<para>
|
||||
This document describes the interfaces available for device drivers that
|
||||
drive s390 based channel attached devices. This includes interfaces for
|
||||
interaction with the hardware and interfaces for interacting with the
|
||||
common driver core. Those interfaces are provided by the s390 common I/O
|
||||
layer.
|
||||
</para>
|
||||
<para>
|
||||
The document assumes a familarity with the technical terms associated
|
||||
with the s390 channel I/O architecture. For a description of this
|
||||
architecture, please refer to the "z/Architecture: Principles of
|
||||
Operation", IBM publication no. SA22-7832.
|
||||
</para>
|
||||
<para>
|
||||
While most I/O devices on a s390 system are typically driven through the
|
||||
channel I/O mechanism described here, there are various other methods
|
||||
(like the diag interface). These are out of the scope of this document.
|
||||
</para>
|
||||
<para>
|
||||
Some additional information can also be found in the kernel source
|
||||
under Documentation/s390/driver-model.txt.
|
||||
</para>
|
||||
</chapter>
|
||||
<chapter id="ccw">
|
||||
<title>The ccw bus</title>
|
||||
<para>
|
||||
The ccw bus typically contains the majority of devices available to
|
||||
a s390 system. Named after the channel command word (ccw), the basic
|
||||
command structure used to address its devices, the ccw bus contains
|
||||
so-called channel attached devices. They are addressed via subchannels,
|
||||
visible on the css bus. A device driver, however, will never interact
|
||||
with the subchannel directly, but only via the device on the ccw bus,
|
||||
the ccw device.
|
||||
</para>
|
||||
<sect1 id="channelIO">
|
||||
<title>I/O functions for channel-attached devices</title>
|
||||
<para>
|
||||
Some hardware structures have been translated into C structures for use
|
||||
by the common I/O layer and device drivers. For more information on
|
||||
the hardware structures represented here, please consult the Principles
|
||||
of Operation.
|
||||
</para>
|
||||
!Iinclude/asm-s390/cio.h
|
||||
</sect1>
|
||||
<sect1 id="ccwdev">
|
||||
<title>ccw devices</title>
|
||||
<para>
|
||||
Devices that want to initiate channel I/O need to attach to the ccw bus.
|
||||
Interaction with the driver core is done via the common I/O layer, which
|
||||
provides the abstractions of ccw devices and ccw device drivers.
|
||||
</para>
|
||||
<para>
|
||||
The functions that initiate or terminate channel I/O all act upon a
|
||||
ccw device structure. Device drivers must not bypass those functions
|
||||
or strange side effects may happen.
|
||||
</para>
|
||||
!Iinclude/asm-s390/ccwdev.h
|
||||
!Edrivers/s390/cio/device.c
|
||||
!Edrivers/s390/cio/device_ops.c
|
||||
</sect1>
|
||||
<sect1 id="cmf">
|
||||
<title>The channel-measurement facility</title>
|
||||
<para>
|
||||
The channel-measurement facility provides a means to collect
|
||||
measurement data which is made available by the channel subsystem
|
||||
for each channel attached device.
|
||||
</para>
|
||||
!Iinclude/asm-s390/cmb.h
|
||||
!Edrivers/s390/cio/cmf.c
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="ccwgroup">
|
||||
<title>The ccwgroup bus</title>
|
||||
<para>
|
||||
The ccwgroup bus only contains artificial devices, created by the user.
|
||||
Many networking devices (e.g. qeth) are in fact composed of several
|
||||
ccw devices (like read, write and data channel for qeth). The
|
||||
ccwgroup bus provides a mechanism to create a meta-device which
|
||||
contains those ccw devices as slave devices and can be associated
|
||||
with the netdevice.
|
||||
</para>
|
||||
<sect1 id="ccwgroupdevices">
|
||||
<title>ccw group devices</title>
|
||||
!Iinclude/asm-s390/ccwgroup.h
|
||||
!Edrivers/s390/cio/ccwgroup.c
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
</book>
|
607
Documentation/DocBook/uio-howto.tmpl
Normal file
607
Documentation/DocBook/uio-howto.tmpl
Normal file
@ -0,0 +1,607 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
|
||||
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" []>
|
||||
|
||||
<book id="index">
|
||||
<bookinfo>
|
||||
<title>The Userspace I/O HOWTO</title>
|
||||
|
||||
<author>
|
||||
<firstname>Hans-Jürgen</firstname>
|
||||
<surname>Koch</surname>
|
||||
<authorblurb><para>Linux developer, Linutronix</para></authorblurb>
|
||||
<affiliation>
|
||||
<orgname>
|
||||
<ulink url="http://www.linutronix.de">Linutronix</ulink>
|
||||
</orgname>
|
||||
|
||||
<address>
|
||||
<email>hjk@linutronix.de</email>
|
||||
</address>
|
||||
</affiliation>
|
||||
</author>
|
||||
|
||||
<pubdate>2006-12-11</pubdate>
|
||||
|
||||
<abstract>
|
||||
<para>This HOWTO describes concept and usage of Linux kernel's
|
||||
Userspace I/O system.</para>
|
||||
</abstract>
|
||||
|
||||
<revhistory>
|
||||
<revision>
|
||||
<revnumber>0.3</revnumber>
|
||||
<date>2007-04-29</date>
|
||||
<authorinitials>hjk</authorinitials>
|
||||
<revremark>Added section about userspace drivers.</revremark>
|
||||
</revision>
|
||||
<revision>
|
||||
<revnumber>0.2</revnumber>
|
||||
<date>2007-02-13</date>
|
||||
<authorinitials>hjk</authorinitials>
|
||||
<revremark>Update after multiple mappings were added.</revremark>
|
||||
</revision>
|
||||
<revision>
|
||||
<revnumber>0.1</revnumber>
|
||||
<date>2006-12-11</date>
|
||||
<authorinitials>hjk</authorinitials>
|
||||
<revremark>First draft.</revremark>
|
||||
</revision>
|
||||
</revhistory>
|
||||
</bookinfo>
|
||||
|
||||
<chapter id="aboutthisdoc">
|
||||
<?dbhtml filename="about.html"?>
|
||||
<title>About this document</title>
|
||||
|
||||
<sect1 id="copyright">
|
||||
<?dbhtml filename="copyright.html"?>
|
||||
<title>Copyright and License</title>
|
||||
<para>
|
||||
Copyright (c) 2006 by Hans-Jürgen Koch.</para>
|
||||
<para>
|
||||
This documentation is Free Software licensed under the terms of the
|
||||
GPL version 2.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="translations">
|
||||
<?dbhtml filename="translations.html"?>
|
||||
<title>Translations</title>
|
||||
|
||||
<para>If you know of any translations for this document, or you are
|
||||
interested in translating it, please email me
|
||||
<email>hjk@linutronix.de</email>.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="preface">
|
||||
<title>Preface</title>
|
||||
<para>
|
||||
For many types of devices, creating a Linux kernel driver is
|
||||
overkill. All that is really needed is some way to handle an
|
||||
interrupt and provide access to the memory space of the
|
||||
device. The logic of controlling the device does not
|
||||
necessarily have to be within the kernel, as the device does
|
||||
not need to take advantage of any of other resources that the
|
||||
kernel provides. One such common class of devices that are
|
||||
like this are for industrial I/O cards.
|
||||
</para>
|
||||
<para>
|
||||
To address this situation, the userspace I/O system (UIO) was
|
||||
designed. For typical industrial I/O cards, only a very small
|
||||
kernel module is needed. The main part of the driver will run in
|
||||
user space. This simplifies development and reduces the risk of
|
||||
serious bugs within a kernel module.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="thanks">
|
||||
<title>Acknowledgments</title>
|
||||
<para>I'd like to thank Thomas Gleixner and Benedikt Spranger of
|
||||
Linutronix, who have not only written most of the UIO code, but also
|
||||
helped greatly writing this HOWTO by giving me all kinds of background
|
||||
information.</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="feedback">
|
||||
<title>Feedback</title>
|
||||
<para>Find something wrong with this document? (Or perhaps something
|
||||
right?) I would love to hear from you. Please email me at
|
||||
<email>hjk@linutronix.de</email>.</para>
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="about">
|
||||
<?dbhtml filename="about.html"?>
|
||||
<title>About UIO</title>
|
||||
|
||||
<para>If you use UIO for your card's driver, here's what you get:</para>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>only one small kernel module to write and maintain.</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>develop the main part of your driver in user space,
|
||||
with all the tools and libraries you're used to.</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>bugs in your driver won't crash the kernel.</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>updates of your driver can take place without recompiling
|
||||
the kernel.</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
|
||||
<sect1 id="how_uio_works">
|
||||
<title>How UIO works</title>
|
||||
<para>
|
||||
Each UIO device is accessed through a device file and several
|
||||
sysfs attribute files. The device file will be called
|
||||
<filename>/dev/uio0</filename> for the first device, and
|
||||
<filename>/dev/uio1</filename>, <filename>/dev/uio2</filename>
|
||||
and so on for subsequent devices.
|
||||
</para>
|
||||
|
||||
<para><filename>/dev/uioX</filename> is used to access the
|
||||
address space of the card. Just use
|
||||
<function>mmap()</function> to access registers or RAM
|
||||
locations of your card.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Interrupts are handled by reading from
|
||||
<filename>/dev/uioX</filename>. A blocking
|
||||
<function>read()</function> from
|
||||
<filename>/dev/uioX</filename> will return as soon as an
|
||||
interrupt occurs. You can also use
|
||||
<function>select()</function> on
|
||||
<filename>/dev/uioX</filename> to wait for an interrupt. The
|
||||
integer value read from <filename>/dev/uioX</filename>
|
||||
represents the total interrupt count. You can use this number
|
||||
to figure out if you missed some interrupts.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To handle interrupts properly, your custom kernel module can
|
||||
provide its own interrupt handler. It will automatically be
|
||||
called by the built-in handler.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For cards that don't generate interrupts but need to be
|
||||
polled, there is the possibility to set up a timer that
|
||||
triggers the interrupt handler at configurable time intervals.
|
||||
See <filename>drivers/uio/uio_dummy.c</filename> for an
|
||||
example of this technique.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Each driver provides attributes that are used to read or write
|
||||
variables. These attributes are accessible through sysfs
|
||||
files. A custom kernel driver module can add its own
|
||||
attributes to the device owned by the uio driver, but not added
|
||||
to the UIO device itself at this time. This might change in the
|
||||
future if it would be found to be useful.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The following standard attributes are provided by the UIO
|
||||
framework:
|
||||
</para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
<filename>name</filename>: The name of your device. It is
|
||||
recommended to use the name of your kernel module for this.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<filename>version</filename>: A version string defined by your
|
||||
driver. This allows the user space part of your driver to deal
|
||||
with different versions of the kernel module.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<filename>event</filename>: The total number of interrupts
|
||||
handled by the driver since the last time the device node was
|
||||
read.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
<para>
|
||||
These attributes appear under the
|
||||
<filename>/sys/class/uio/uioX</filename> directory. Please
|
||||
note that this directory might be a symlink, and not a real
|
||||
directory. Any userspace code that accesses it must be able
|
||||
to handle this.
|
||||
</para>
|
||||
<para>
|
||||
Each UIO device can make one or more memory regions available for
|
||||
memory mapping. This is necessary because some industrial I/O cards
|
||||
require access to more than one PCI memory region in a driver.
|
||||
</para>
|
||||
<para>
|
||||
Each mapping has its own directory in sysfs, the first mapping
|
||||
appears as <filename>/sys/class/uio/uioX/maps/map0/</filename>.
|
||||
Subsequent mappings create directories <filename>map1/</filename>,
|
||||
<filename>map2/</filename>, and so on. These directories will only
|
||||
appear if the size of the mapping is not 0.
|
||||
</para>
|
||||
<para>
|
||||
Each <filename>mapX/</filename> directory contains two read-only files
|
||||
that show start address and size of the memory:
|
||||
</para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
<filename>addr</filename>: The address of memory that can be mapped.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<filename>size</filename>: The size, in bytes, of the memory
|
||||
pointed to by addr.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
|
||||
<para>
|
||||
From userspace, the different mappings are distinguished by adjusting
|
||||
the <varname>offset</varname> parameter of the
|
||||
<function>mmap()</function> call. To map the memory of mapping N, you
|
||||
have to use N times the page size as your offset:
|
||||
</para>
|
||||
<programlisting format="linespecific">
|
||||
offset = N * getpagesize();
|
||||
</programlisting>
|
||||
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="using-uio_dummy" xreflabel="Using uio_dummy">
|
||||
<?dbhtml filename="using-uio_dummy.html"?>
|
||||
<title>Using uio_dummy</title>
|
||||
<para>
|
||||
Well, there is no real use for uio_dummy. Its only purpose is
|
||||
to test most parts of the UIO system (everything except
|
||||
hardware interrupts), and to serve as an example for the
|
||||
kernel module that you will have to write yourself.
|
||||
</para>
|
||||
|
||||
<sect1 id="what_uio_dummy_does">
|
||||
<title>What uio_dummy does</title>
|
||||
<para>
|
||||
The kernel module <filename>uio_dummy.ko</filename> creates a
|
||||
device that uses a timer to generate periodic interrupts. The
|
||||
interrupt handler does nothing but increment a counter. The
|
||||
driver adds two custom attributes, <varname>count</varname>
|
||||
and <varname>freq</varname>, that appear under
|
||||
<filename>/sys/devices/platform/uio_dummy/</filename>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The attribute <varname>count</varname> can be read and
|
||||
written. The associated file
|
||||
<filename>/sys/devices/platform/uio_dummy/count</filename>
|
||||
appears as a normal text file and contains the total number of
|
||||
timer interrupts. If you look at it (e.g. using
|
||||
<function>cat</function>), you'll notice it is slowly counting
|
||||
up.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The attribute <varname>freq</varname> can be read and written.
|
||||
The content of
|
||||
<filename>/sys/devices/platform/uio_dummy/freq</filename>
|
||||
represents the number of system timer ticks between two timer
|
||||
interrupts. The default value of <varname>freq</varname> is
|
||||
the value of the kernel variable <varname>HZ</varname>, which
|
||||
gives you an interval of one second. Lower values will
|
||||
increase the frequency. Try the following:
|
||||
</para>
|
||||
<programlisting format="linespecific">
|
||||
cd /sys/devices/platform/uio_dummy/
|
||||
echo 100 > freq
|
||||
</programlisting>
|
||||
<para>
|
||||
Use <function>cat count</function> to see how the interrupt
|
||||
frequency changes.
|
||||
</para>
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="custom_kernel_module" xreflabel="Writing your own kernel module">
|
||||
<?dbhtml filename="custom_kernel_module.html"?>
|
||||
<title>Writing your own kernel module</title>
|
||||
<para>
|
||||
Please have a look at <filename>uio_dummy.c</filename> as an
|
||||
example. The following paragraphs explain the different
|
||||
sections of this file.
|
||||
</para>
|
||||
|
||||
<sect1 id="uio_info">
|
||||
<title>struct uio_info</title>
|
||||
<para>
|
||||
This structure tells the framework the details of your driver,
|
||||
Some of the members are required, others are optional.
|
||||
</para>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem><para>
|
||||
<varname>char *name</varname>: Required. The name of your driver as
|
||||
it will appear in sysfs. I recommend using the name of your module for this.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>char *version</varname>: Required. This string appears in
|
||||
<filename>/sys/class/uio/uioX/version</filename>.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>struct uio_mem mem[ MAX_UIO_MAPS ]</varname>: Required if you
|
||||
have memory that can be mapped with <function>mmap()</function>. For each
|
||||
mapping you need to fill one of the <varname>uio_mem</varname> structures.
|
||||
See the description below for details.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>long irq</varname>: Required. If your hardware generates an
|
||||
interrupt, it's your modules task to determine the irq number during
|
||||
initialization. If you don't have a hardware generated interrupt but
|
||||
want to trigger the interrupt handler in some other way, set
|
||||
<varname>irq</varname> to <varname>UIO_IRQ_CUSTOM</varname>. The
|
||||
uio_dummy module does this as it triggers the event mechanism in a timer
|
||||
routine. If you had no interrupt at all, you could set
|
||||
<varname>irq</varname> to <varname>UIO_IRQ_NONE</varname>, though this
|
||||
rarely makes sense.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>unsigned long irq_flags</varname>: Required if you've set
|
||||
<varname>irq</varname> to a hardware interrupt number. The flags given
|
||||
here will be used in the call to <function>request_irq()</function>.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>int (*mmap)(struct uio_info *info, struct vm_area_struct
|
||||
*vma)</varname>: Optional. If you need a special
|
||||
<function>mmap()</function> function, you can set it here. If this
|
||||
pointer is not NULL, your <function>mmap()</function> will be called
|
||||
instead of the built-in one.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>int (*open)(struct uio_info *info, struct inode *inode)
|
||||
</varname>: Optional. You might want to have your own
|
||||
<function>open()</function>, e.g. to enable interrupts only when your
|
||||
device is actually used.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>int (*release)(struct uio_info *info, struct inode *inode)
|
||||
</varname>: Optional. If you define your own
|
||||
<function>open()</function>, you will probably also want a custom
|
||||
<function>release()</function> function.
|
||||
</para></listitem>
|
||||
</itemizedlist>
|
||||
|
||||
<para>
|
||||
Usually, your device will have one or more memory regions that can be mapped
|
||||
to user space. For each region, you have to set up a
|
||||
<varname>struct uio_mem</varname> in the <varname>mem[]</varname> array.
|
||||
Here's a description of the fields of <varname>struct uio_mem</varname>:
|
||||
</para>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem><para>
|
||||
<varname>int memtype</varname>: Required if the mapping is used. Set this to
|
||||
<varname>UIO_MEM_PHYS</varname> if you you have physical memory on your
|
||||
card to be mapped. Use <varname>UIO_MEM_LOGICAL</varname> for logical
|
||||
memory (e.g. allocated with <function>kmalloc()</function>). There's also
|
||||
<varname>UIO_MEM_VIRTUAL</varname> for virtual memory.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>unsigned long addr</varname>: Required if the mapping is used.
|
||||
Fill in the address of your memory block. This address is the one that
|
||||
appears in sysfs.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>unsigned long size</varname>: Fill in the size of the
|
||||
memory block that <varname>addr</varname> points to. If <varname>size</varname>
|
||||
is zero, the mapping is considered unused. Note that you
|
||||
<emphasis>must</emphasis> initialize <varname>size</varname> with zero for
|
||||
all unused mappings.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
<varname>void *internal_addr</varname>: If you have to access this memory
|
||||
region from within your kernel module, you will want to map it internally by
|
||||
using something like <function>ioremap()</function>. Addresses
|
||||
returned by this function cannot be mapped to user space, so you must not
|
||||
store it in <varname>addr</varname>. Use <varname>internal_addr</varname>
|
||||
instead to remember such an address.
|
||||
</para></listitem>
|
||||
</itemizedlist>
|
||||
|
||||
<para>
|
||||
Please do not touch the <varname>kobj</varname> element of
|
||||
<varname>struct uio_mem</varname>! It is used by the UIO framework
|
||||
to set up sysfs files for this mapping. Simply leave it alone.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="adding_irq_handler">
|
||||
<title>Adding an interrupt handler</title>
|
||||
<para>
|
||||
What you need to do in your interrupt handler depends on your
|
||||
hardware and on how you want to handle it. You should try to
|
||||
keep the amount of code in your kernel interrupt handler low.
|
||||
If your hardware requires no action that you
|
||||
<emphasis>have</emphasis> to perform after each interrupt,
|
||||
then your handler can be empty.</para> <para>If, on the other
|
||||
hand, your hardware <emphasis>needs</emphasis> some action to
|
||||
be performed after each interrupt, then you
|
||||
<emphasis>must</emphasis> do it in your kernel module. Note
|
||||
that you cannot rely on the userspace part of your driver. Your
|
||||
userspace program can terminate at any time, possibly leaving
|
||||
your hardware in a state where proper interrupt handling is
|
||||
still required.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
There might also be applications where you want to read data
|
||||
from your hardware at each interrupt and buffer it in a piece
|
||||
of kernel memory you've allocated for that purpose. With this
|
||||
technique you could avoid loss of data if your userspace
|
||||
program misses an interrupt.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
A note on shared interrupts: Your driver should support
|
||||
interrupt sharing whenever this is possible. It is possible if
|
||||
and only if your driver can detect whether your hardware has
|
||||
triggered the interrupt or not. This is usually done by looking
|
||||
at an interrupt status register. If your driver sees that the
|
||||
IRQ bit is actually set, it will perform its actions, and the
|
||||
handler returns IRQ_HANDLED. If the driver detects that it was
|
||||
not your hardware that caused the interrupt, it will do nothing
|
||||
and return IRQ_NONE, allowing the kernel to call the next
|
||||
possible interrupt handler.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
If you decide not to support shared interrupts, your card
|
||||
won't work in computers with no free interrupts. As this
|
||||
frequently happens on the PC platform, you can save yourself a
|
||||
lot of trouble by supporting interrupt sharing.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
</chapter>
|
||||
|
||||
<chapter id="userspace_driver" xreflabel="Writing a driver in user space">
|
||||
<?dbhtml filename="userspace_driver.html"?>
|
||||
<title>Writing a driver in userspace</title>
|
||||
<para>
|
||||
Once you have a working kernel module for your hardware, you can
|
||||
write the userspace part of your driver. You don't need any special
|
||||
libraries, your driver can be written in any reasonable language,
|
||||
you can use floating point numbers and so on. In short, you can
|
||||
use all the tools and libraries you'd normally use for writing a
|
||||
userspace application.
|
||||
</para>
|
||||
|
||||
<sect1 id="getting_uio_information">
|
||||
<title>Getting information about your UIO device</title>
|
||||
<para>
|
||||
Information about all UIO devices is available in sysfs. The
|
||||
first thing you should do in your driver is check
|
||||
<varname>name</varname> and <varname>version</varname> to
|
||||
make sure your talking to the right device and that its kernel
|
||||
driver has the version you expect.
|
||||
</para>
|
||||
<para>
|
||||
You should also make sure that the memory mapping you need
|
||||
exists and has the size you expect.
|
||||
</para>
|
||||
<para>
|
||||
There is a tool called <varname>lsuio</varname> that lists
|
||||
UIO devices and their attributes. It is available here:
|
||||
</para>
|
||||
<para>
|
||||
<ulink url="http://www.osadl.org/projects/downloads/UIO/user/">
|
||||
http://www.osadl.org/projects/downloads/UIO/user/</ulink>
|
||||
</para>
|
||||
<para>
|
||||
With <varname>lsuio</varname> you can quickly check if your
|
||||
kernel module is loaded and which attributes it exports.
|
||||
Have a look at the manpage for details.
|
||||
</para>
|
||||
<para>
|
||||
The source code of <varname>lsuio</varname> can serve as an
|
||||
example for getting information about an UIO device.
|
||||
The file <filename>uio_helper.c</filename> contains a lot of
|
||||
functions you could use in your userspace driver code.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="mmap_device_memory">
|
||||
<title>mmap() device memory</title>
|
||||
<para>
|
||||
After you made sure you've got the right device with the
|
||||
memory mappings you need, all you have to do is to call
|
||||
<function>mmap()</function> to map the device's memory
|
||||
to userspace.
|
||||
</para>
|
||||
<para>
|
||||
The parameter <varname>offset</varname> of the
|
||||
<function>mmap()</function> call has a special meaning
|
||||
for UIO devices: It is used to select which mapping of
|
||||
your device you want to map. To map the memory of
|
||||
mapping N, you have to use N times the page size as
|
||||
your offset:
|
||||
</para>
|
||||
<programlisting format="linespecific">
|
||||
offset = N * getpagesize();
|
||||
</programlisting>
|
||||
<para>
|
||||
N starts from zero, so if you've got only one memory
|
||||
range to map, set <varname>offset = 0</varname>.
|
||||
A drawback of this technique is that memory is always
|
||||
mapped beginning with its start address.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="wait_for_interrupts">
|
||||
<title>Waiting for interrupts</title>
|
||||
<para>
|
||||
After you successfully mapped your devices memory, you
|
||||
can access it like an ordinary array. Usually, you will
|
||||
perform some initialization. After that, your hardware
|
||||
starts working and will generate an interrupt as soon
|
||||
as it's finished, has some data available, or needs your
|
||||
attention because an error occured.
|
||||
</para>
|
||||
<para>
|
||||
<filename>/dev/uioX</filename> is a read-only file. A
|
||||
<function>read()</function> will always block until an
|
||||
interrupt occurs. There is only one legal value for the
|
||||
<varname>count</varname> parameter of
|
||||
<function>read()</function>, and that is the size of a
|
||||
signed 32 bit integer (4). Any other value for
|
||||
<varname>count</varname> causes <function>read()</function>
|
||||
to fail. The signed 32 bit integer read is the interrupt
|
||||
count of your device. If the value is one more than the value
|
||||
you read the last time, everything is OK. If the difference
|
||||
is greater than one, you missed interrupts.
|
||||
</para>
|
||||
<para>
|
||||
You can also use <function>select()</function> on
|
||||
<filename>/dev/uioX</filename>.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
</chapter>
|
||||
|
||||
<appendix id="app1">
|
||||
<title>Further information</title>
|
||||
<itemizedlist>
|
||||
<listitem><para>
|
||||
<ulink url="http://www.osadl.org">
|
||||
OSADL homepage.</ulink>
|
||||
</para></listitem>
|
||||
<listitem><para>
|
||||
<ulink url="http://www.linutronix.de">
|
||||
Linutronix homepage.</ulink>
|
||||
</para></listitem>
|
||||
</itemizedlist>
|
||||
</appendix>
|
||||
|
||||
</book>
|
@ -208,7 +208,7 @@ tools. One such tool that is particularly recommended is the Linux
|
||||
Cross-Reference project, which is able to present source code in a
|
||||
self-referential, indexed webpage format. An excellent up-to-date
|
||||
repository of the kernel code may be found at:
|
||||
http://sosdg.org/~coywolf/lxr/
|
||||
http://users.sosdg.org/~qiyong/lxr/
|
||||
|
||||
|
||||
The development process
|
||||
@ -249,6 +249,9 @@ process is as follows:
|
||||
release a new -rc kernel every week.
|
||||
- Process continues until the kernel is considered "ready", the
|
||||
process should last around 6 weeks.
|
||||
- A list of known regressions present in each -rc release is
|
||||
tracked at the following URI:
|
||||
http://kernelnewbies.org/known_regressions
|
||||
|
||||
It is worth mentioning what Andrew Morton wrote on the linux-kernel
|
||||
mailing list about kernel releases:
|
||||
@ -381,7 +384,7 @@ One of the best ways to put into practice your hacking skills is by fixing
|
||||
bugs reported by other people. Not only you will help to make the kernel
|
||||
more stable, you'll learn to fix real world problems and you will improve
|
||||
your skills, and other developers will be aware of your presence. Fixing
|
||||
bugs is one of the best ways to earn merit amongst the developers, because
|
||||
bugs is one of the best ways to get merits among other developers, because
|
||||
not many people like wasting time fixing other people's bugs.
|
||||
|
||||
To work in the already reported bug reports, go to http://bugzilla.kernel.org.
|
||||
|
@ -241,68 +241,7 @@ address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
|
||||
will fail enabling MSI-X on its hardware device when it calls the function
|
||||
pci_enable_msix().
|
||||
|
||||
5.3.2 Handling MSI-X allocation
|
||||
|
||||
Determining the number of MSI-X vectors allocated to a function is
|
||||
dependent on the number of MSI capable devices and MSI-X capable
|
||||
devices populated in the system. The policy of allocating MSI-X
|
||||
vectors to a function is defined as the following:
|
||||
|
||||
#of MSI-X vectors allocated to a function = (x - y)/z where
|
||||
|
||||
x = The number of available PCI vector resources by the time
|
||||
the device driver calls pci_enable_msix(). The PCI vector
|
||||
resources is the sum of the number of unassigned vectors
|
||||
(new) and the number of released vectors when any MSI/MSI-X
|
||||
device driver switches its hardware device back to a legacy
|
||||
mode or is hot-removed. The number of unassigned vectors
|
||||
may exclude some vectors reserved, as defined in parameter
|
||||
NR_HP_RESERVED_VECTORS, for the case where the system is
|
||||
capable of supporting hot-add/hot-remove operations. Users
|
||||
may change the value defined in NR_HR_RESERVED_VECTORS to
|
||||
meet their specific needs.
|
||||
|
||||
y = The number of MSI capable devices populated in the system.
|
||||
This policy ensures that each MSI capable device has its
|
||||
vector reserved to avoid the case where some MSI-X capable
|
||||
drivers may attempt to claim all available vector resources.
|
||||
|
||||
z = The number of MSI-X capable devices populated in the system.
|
||||
This policy ensures that maximum (x - y) is distributed
|
||||
evenly among MSI-X capable devices.
|
||||
|
||||
Note that the PCI subsystem scans y and z during a bus enumeration.
|
||||
When the PCI subsystem completes configuring MSI/MSI-X capability
|
||||
structure of a device as requested by its device driver, y/z is
|
||||
decremented accordingly.
|
||||
|
||||
5.3.3 Handling MSI-X shortages
|
||||
|
||||
For the case where fewer MSI-X vectors are allocated to a function
|
||||
than requested, the function pci_enable_msix() will return the
|
||||
maximum number of MSI-X vectors available to the caller. A device
|
||||
driver may re-send its request with fewer or equal vectors indicated
|
||||
in the return. For example, if a device driver requests 5 vectors, but
|
||||
the number of available vectors is 3 vectors, a value of 3 will be
|
||||
returned as a result of pci_enable_msix() call. A function could be
|
||||
designed for its driver to use only 3 MSI-X table entries as
|
||||
different combinations as ABC--, A-B-C, A--CB, etc. Note that this
|
||||
patch does not support multiple entries with the same vector. Such
|
||||
attempt by a device driver to use 5 MSI-X table entries with 3 vectors
|
||||
as ABBCC, AABCC, BCCBA, etc will result as a failure by the function
|
||||
pci_enable_msix(). Below are the reasons why supporting multiple
|
||||
entries with the same vector is an undesirable solution.
|
||||
|
||||
- The PCI subsystem cannot determine the entry that
|
||||
generated the message to mask/unmask MSI while handling
|
||||
software driver ISR. Attempting to walk through all MSI-X
|
||||
table entries (2048 max) to mask/unmask any match vector
|
||||
is an undesirable solution.
|
||||
|
||||
- Walking through all MSI-X table entries (2048 max) to handle
|
||||
SMP affinity of any match vector is an undesirable solution.
|
||||
|
||||
5.3.4 API pci_enable_msix
|
||||
5.3.2 API pci_enable_msix
|
||||
|
||||
int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
|
||||
|
||||
@ -339,7 +278,7 @@ a failure. This failure may be a result of duplicate entries
|
||||
specified in second argument, or a result of no available vector,
|
||||
or a result of failing to initialize MSI-X table entries.
|
||||
|
||||
5.3.5 API pci_disable_msix
|
||||
5.3.3 API pci_disable_msix
|
||||
|
||||
void pci_disable_msix(struct pci_dev *dev)
|
||||
|
||||
@ -349,7 +288,7 @@ always call free_irq() on all MSI-X vectors it has done request_irq()
|
||||
on before calling this API. Failure to do so results in a BUG_ON() and
|
||||
a device will be left with MSI-X enabled and leaks its vectors.
|
||||
|
||||
5.3.6 MSI-X mode vs. legacy mode diagram
|
||||
5.3.4 MSI-X mode vs. legacy mode diagram
|
||||
|
||||
The below diagram shows the events which switch the interrupt
|
||||
mode on the MSI-X capable device function between MSI-X mode and
|
||||
@ -407,7 +346,7 @@ between MSI mod MSI-X mode during a run-time.
|
||||
MSI/MSI-X support requires support from both system hardware and
|
||||
individual hardware device functions.
|
||||
|
||||
5.5.1 System hardware support
|
||||
5.5.1 Required x86 hardware support
|
||||
|
||||
Since the target of MSI address is the local APIC CPU, enabling
|
||||
MSI/MSI-X support in the Linux kernel is dependent on whether existing
|
||||
|
@ -166,7 +166,7 @@ To solve this problem, you really only have two options:
|
||||
The option of being unfailingly polite really doesn't exist. Nobody will
|
||||
trust somebody who is so clearly hiding his true character.
|
||||
|
||||
(*) Paul Simon sang "Fifty Ways to Lose Your Lover", because quite
|
||||
(*) Paul Simon sang "Fifty Ways to Leave Your Lover", because quite
|
||||
frankly, "A Million Ways to Tell a Developer He Is a D*ckhead" doesn't
|
||||
scan nearly as well. But I'm sure he thought about it.
|
||||
|
||||
|
@ -122,11 +122,11 @@ then only post say 15 or so at a time and wait for review and integration.
|
||||
|
||||
Check your patch for basic style violations, details of which can be
|
||||
found in Documentation/CodingStyle. Failure to do so simply wastes
|
||||
the reviewers time and will get your patch rejected, probabally
|
||||
the reviewers time and will get your patch rejected, probably
|
||||
without even being read.
|
||||
|
||||
At a minimum you should check your patches with the patch style
|
||||
checker prior to submission (scripts/patchcheck.pl). You should
|
||||
checker prior to submission (scripts/checkpatch.pl). You should
|
||||
be able to justify all violations that remain in your patch.
|
||||
|
||||
|
||||
@ -560,7 +560,7 @@ NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
|
||||
<http://marc.theaimsgroup.com/?l=linux-kernel&m=112112749912944&w=2>
|
||||
|
||||
Kernel Documentation/CodingStyle:
|
||||
<http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle>
|
||||
<http://users.sosdg.org/~qiyong/lxr/source/Documentation/CodingStyle>
|
||||
|
||||
Linus Torvalds's mail on the canonical patch format:
|
||||
<http://lkml.org/lkml/2005/4/7/183>
|
||||
|
@ -196,7 +196,7 @@ void print_delayacct(struct taskstats *t)
|
||||
"IO %15s%15s\n"
|
||||
" %15llu%15llu\n"
|
||||
"MEM %15s%15s\n"
|
||||
" %15llu%15llu\n"
|
||||
" %15llu%15llu\n",
|
||||
"count", "real total", "virtual total", "delay total",
|
||||
t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
|
||||
t->cpu_delay_total,
|
||||
|
@ -79,9 +79,9 @@ and how to prepare flush requests. Note that the term 'ordered' is
|
||||
used to indicate the whole sequence of performing barrier requests
|
||||
including draining and flushing.
|
||||
|
||||
typedef void (prepare_flush_fn)(request_queue_t *q, struct request *rq);
|
||||
typedef void (prepare_flush_fn)(struct request_queue *q, struct request *rq);
|
||||
|
||||
int blk_queue_ordered(request_queue_t *q, unsigned ordered,
|
||||
int blk_queue_ordered(struct request_queue *q, unsigned ordered,
|
||||
prepare_flush_fn *prepare_flush_fn);
|
||||
|
||||
@q : the queue in question
|
||||
@ -92,7 +92,7 @@ int blk_queue_ordered(request_queue_t *q, unsigned ordered,
|
||||
For example, SCSI disk driver's prepare_flush_fn looks like the
|
||||
following.
|
||||
|
||||
static void sd_prepare_flush(request_queue_t *q, struct request *rq)
|
||||
static void sd_prepare_flush(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
memset(rq->cmd, 0, sizeof(rq->cmd));
|
||||
rq->cmd_type = REQ_TYPE_BLOCK_PC;
|
||||
|
@ -477,9 +477,9 @@ With this multipage bio design:
|
||||
the same bi_io_vec array, but with the index and size accordingly modified)
|
||||
- A linked list of bios is used as before for unrelated merges (*) - this
|
||||
avoids reallocs and makes independent completions easier to handle.
|
||||
- Code that traverses the req list needs to make a distinction between
|
||||
segments of a request (bio_for_each_segment) and the distinct completion
|
||||
units/bios (rq_for_each_bio).
|
||||
- Code that traverses the req list can find all the segments of a bio
|
||||
by using rq_for_each_segment. This handles the fact that a request
|
||||
has multiple bios, each of which can have multiple segments.
|
||||
- Drivers which can't process a large bio in one shot can use the bi_idx
|
||||
field to keep track of the next bio_vec entry to process.
|
||||
(e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
|
||||
@ -664,14 +664,14 @@ in lvm or md.
|
||||
|
||||
3.2.1 Traversing segments and completion units in a request
|
||||
|
||||
The macros bio_for_each_segment() and rq_for_each_bio() should be used for
|
||||
traversing the bios in the request list (drivers should avoid directly
|
||||
trying to do it themselves). Using these helpers should also make it easier
|
||||
to cope with block changes in the future.
|
||||
The macro rq_for_each_segment() should be used for traversing the bios
|
||||
in the request list (drivers should avoid directly trying to do it
|
||||
themselves). Using these helpers should also make it easier to cope
|
||||
with block changes in the future.
|
||||
|
||||
rq_for_each_bio(bio, rq)
|
||||
bio_for_each_segment(bio_vec, bio, i)
|
||||
/* bio_vec is now current segment */
|
||||
struct req_iterator iter;
|
||||
rq_for_each_segment(bio_vec, rq, iter)
|
||||
/* bio_vec is now current segment */
|
||||
|
||||
I/O completion callbacks are per-bio rather than per-segment, so drivers
|
||||
that traverse bio chains on completion need to keep that in mind. Drivers
|
||||
@ -740,12 +740,12 @@ Block now offers some simple generic functionality to help support command
|
||||
queueing (typically known as tagged command queueing), ie manage more than
|
||||
one outstanding command on a queue at any given time.
|
||||
|
||||
blk_queue_init_tags(request_queue_t *q, int depth)
|
||||
blk_queue_init_tags(struct request_queue *q, int depth)
|
||||
|
||||
Initialize internal command tagging structures for a maximum
|
||||
depth of 'depth'.
|
||||
|
||||
blk_queue_free_tags((request_queue_t *q)
|
||||
blk_queue_free_tags((struct request_queue *q)
|
||||
|
||||
Teardown tag info associated with the queue. This will be done
|
||||
automatically by block if blk_queue_cleanup() is called on a queue
|
||||
@ -754,7 +754,7 @@ one outstanding command on a queue at any given time.
|
||||
The above are initialization and exit management, the main helpers during
|
||||
normal operations are:
|
||||
|
||||
blk_queue_start_tag(request_queue_t *q, struct request *rq)
|
||||
blk_queue_start_tag(struct request_queue *q, struct request *rq)
|
||||
|
||||
Start tagged operation for this request. A free tag number between
|
||||
0 and 'depth' is assigned to the request (rq->tag holds this number),
|
||||
@ -762,7 +762,7 @@ normal operations are:
|
||||
for this queue is already achieved (or if the tag wasn't started for
|
||||
some other reason), 1 is returned. Otherwise 0 is returned.
|
||||
|
||||
blk_queue_end_tag(request_queue_t *q, struct request *rq)
|
||||
blk_queue_end_tag(struct request_queue *q, struct request *rq)
|
||||
|
||||
End tagged operation on this request. 'rq' is removed from the internal
|
||||
book keeping structures.
|
||||
@ -781,7 +781,7 @@ queue. For instance, on IDE any tagged request error needs to clear both
|
||||
the hardware and software block queue and enable the driver to sanely restart
|
||||
all the outstanding requests. There's a third helper to do that:
|
||||
|
||||
blk_queue_invalidate_tags(request_queue_t *q)
|
||||
blk_queue_invalidate_tags(struct request_queue *q)
|
||||
|
||||
Clear the internal block tag queue and re-add all the pending requests
|
||||
to the request queue. The driver will receive them again on the
|
||||
|
@ -86,8 +86,15 @@ extern int sys_ioprio_get(int, int);
|
||||
#error "Unsupported arch"
|
||||
#endif
|
||||
|
||||
_syscall3(int, ioprio_set, int, which, int, who, int, ioprio);
|
||||
_syscall2(int, ioprio_get, int, which, int, who);
|
||||
static inline int ioprio_set(int which, int who, int ioprio)
|
||||
{
|
||||
return syscall(__NR_ioprio_set, which, who, ioprio);
|
||||
}
|
||||
|
||||
static inline int ioprio_get(int which, int who)
|
||||
{
|
||||
return syscall(__NR_ioprio_get, which, who);
|
||||
}
|
||||
|
||||
enum {
|
||||
IOPRIO_CLASS_NONE,
|
||||
|
@ -83,6 +83,6 @@ struct bio *bio DBI First bio in request
|
||||
|
||||
struct bio *biotail DBI Last bio in request
|
||||
|
||||
request_queue_t *q DB Request queue this request belongs to
|
||||
struct request_queue *q DB Request queue this request belongs to
|
||||
|
||||
struct request_list *rl B Request list this request came from
|
||||
|
@ -124,9 +124,8 @@ static void cn_test_timer_func(unsigned long __data)
|
||||
struct cn_msg *m;
|
||||
char data[32];
|
||||
|
||||
m = kmalloc(sizeof(*m) + sizeof(data), GFP_ATOMIC);
|
||||
m = kzalloc(sizeof(*m) + sizeof(data), GFP_ATOMIC);
|
||||
if (m) {
|
||||
memset(m, 0, sizeof(*m) + sizeof(data));
|
||||
|
||||
memcpy(&m->id, &cn_test_id, sizeof(m->id));
|
||||
m->seq = cn_test_timer_counter;
|
||||
|
@ -29,7 +29,7 @@ In newer kernels, the following are also available:
|
||||
|
||||
If sysfs is enabled, the contents of /sys/class/vtconsole can be
|
||||
examined. This shows the console backends currently registered by the
|
||||
system which are named vtcon<n> where <n> is an integer fro 0 to 15. Thus:
|
||||
system which are named vtcon<n> where <n> is an integer from 0 to 15. Thus:
|
||||
|
||||
ls /sys/class/vtconsole
|
||||
. .. vtcon0 vtcon1
|
||||
|
219
Documentation/crypto/async-tx-api.txt
Normal file
219
Documentation/crypto/async-tx-api.txt
Normal file
@ -0,0 +1,219 @@
|
||||
Asynchronous Transfers/Transforms API
|
||||
|
||||
1 INTRODUCTION
|
||||
|
||||
2 GENEALOGY
|
||||
|
||||
3 USAGE
|
||||
3.1 General format of the API
|
||||
3.2 Supported operations
|
||||
3.3 Descriptor management
|
||||
3.4 When does the operation execute?
|
||||
3.5 When does the operation complete?
|
||||
3.6 Constraints
|
||||
3.7 Example
|
||||
|
||||
4 DRIVER DEVELOPER NOTES
|
||||
4.1 Conformance points
|
||||
4.2 "My application needs finer control of hardware channels"
|
||||
|
||||
5 SOURCE
|
||||
|
||||
---
|
||||
|
||||
1 INTRODUCTION
|
||||
|
||||
The async_tx API provides methods for describing a chain of asynchronous
|
||||
bulk memory transfers/transforms with support for inter-transactional
|
||||
dependencies. It is implemented as a dmaengine client that smooths over
|
||||
the details of different hardware offload engine implementations. Code
|
||||
that is written to the API can optimize for asynchronous operation and
|
||||
the API will fit the chain of operations to the available offload
|
||||
resources.
|
||||
|
||||
2 GENEALOGY
|
||||
|
||||
The API was initially designed to offload the memory copy and
|
||||
xor-parity-calculations of the md-raid5 driver using the offload engines
|
||||
present in the Intel(R) Xscale series of I/O processors. It also built
|
||||
on the 'dmaengine' layer developed for offloading memory copies in the
|
||||
network stack using Intel(R) I/OAT engines. The following design
|
||||
features surfaced as a result:
|
||||
1/ implicit synchronous path: users of the API do not need to know if
|
||||
the platform they are running on has offload capabilities. The
|
||||
operation will be offloaded when an engine is available and carried out
|
||||
in software otherwise.
|
||||
2/ cross channel dependency chains: the API allows a chain of dependent
|
||||
operations to be submitted, like xor->copy->xor in the raid5 case. The
|
||||
API automatically handles cases where the transition from one operation
|
||||
to another implies a hardware channel switch.
|
||||
3/ dmaengine extensions to support multiple clients and operation types
|
||||
beyond 'memcpy'
|
||||
|
||||
3 USAGE
|
||||
|
||||
3.1 General format of the API:
|
||||
struct dma_async_tx_descriptor *
|
||||
async_<operation>(<op specific parameters>,
|
||||
enum async_tx_flags flags,
|
||||
struct dma_async_tx_descriptor *dependency,
|
||||
dma_async_tx_callback callback_routine,
|
||||
void *callback_parameter);
|
||||
|
||||
3.2 Supported operations:
|
||||
memcpy - memory copy between a source and a destination buffer
|
||||
memset - fill a destination buffer with a byte value
|
||||
xor - xor a series of source buffers and write the result to a
|
||||
destination buffer
|
||||
xor_zero_sum - xor a series of source buffers and set a flag if the
|
||||
result is zero. The implementation attempts to prevent
|
||||
writes to memory
|
||||
|
||||
3.3 Descriptor management:
|
||||
The return value is non-NULL and points to a 'descriptor' when the operation
|
||||
has been queued to execute asynchronously. Descriptors are recycled
|
||||
resources, under control of the offload engine driver, to be reused as
|
||||
operations complete. When an application needs to submit a chain of
|
||||
operations it must guarantee that the descriptor is not automatically recycled
|
||||
before the dependency is submitted. This requires that all descriptors be
|
||||
acknowledged by the application before the offload engine driver is allowed to
|
||||
recycle (or free) the descriptor. A descriptor can be acked by one of the
|
||||
following methods:
|
||||
1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
|
||||
2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent
|
||||
descriptor of a new operation.
|
||||
3/ calling async_tx_ack() on the descriptor.
|
||||
|
||||
3.4 When does the operation execute?
|
||||
Operations do not immediately issue after return from the
|
||||
async_<operation> call. Offload engine drivers batch operations to
|
||||
improve performance by reducing the number of mmio cycles needed to
|
||||
manage the channel. Once a driver-specific threshold is met the driver
|
||||
automatically issues pending operations. An application can force this
|
||||
event by calling async_tx_issue_pending_all(). This operates on all
|
||||
channels since the application has no knowledge of channel to operation
|
||||
mapping.
|
||||
|
||||
3.5 When does the operation complete?
|
||||
There are two methods for an application to learn about the completion
|
||||
of an operation.
|
||||
1/ Call dma_wait_for_async_tx(). This call causes the CPU to spin while
|
||||
it polls for the completion of the operation. It handles dependency
|
||||
chains and issuing pending operations.
|
||||
2/ Specify a completion callback. The callback routine runs in tasklet
|
||||
context if the offload engine driver supports interrupts, or it is
|
||||
called in application context if the operation is carried out
|
||||
synchronously in software. The callback can be set in the call to
|
||||
async_<operation>, or when the application needs to submit a chain of
|
||||
unknown length it can use the async_trigger_callback() routine to set a
|
||||
completion interrupt/callback at the end of the chain.
|
||||
|
||||
3.6 Constraints:
|
||||
1/ Calls to async_<operation> are not permitted in IRQ context. Other
|
||||
contexts are permitted provided constraint #2 is not violated.
|
||||
2/ Completion callback routines cannot submit new operations. This
|
||||
results in recursion in the synchronous case and spin_locks being
|
||||
acquired twice in the asynchronous case.
|
||||
|
||||
3.7 Example:
|
||||
Perform a xor->copy->xor operation where each operation depends on the
|
||||
result from the previous operation:
|
||||
|
||||
void complete_xor_copy_xor(void *param)
|
||||
{
|
||||
printk("complete\n");
|
||||
}
|
||||
|
||||
int run_xor_copy_xor(struct page **xor_srcs,
|
||||
int xor_src_cnt,
|
||||
struct page *xor_dest,
|
||||
size_t xor_len,
|
||||
struct page *copy_src,
|
||||
struct page *copy_dest,
|
||||
size_t copy_len)
|
||||
{
|
||||
struct dma_async_tx_descriptor *tx;
|
||||
|
||||
tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
|
||||
ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL);
|
||||
tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len,
|
||||
ASYNC_TX_DEP_ACK, tx, NULL, NULL);
|
||||
tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
|
||||
ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK,
|
||||
tx, complete_xor_copy_xor, NULL);
|
||||
|
||||
async_tx_issue_pending_all();
|
||||
}
|
||||
|
||||
See include/linux/async_tx.h for more information on the flags. See the
|
||||
ops_run_* and ops_complete_* routines in drivers/md/raid5.c for more
|
||||
implementation examples.
|
||||
|
||||
4 DRIVER DEVELOPMENT NOTES
|
||||
4.1 Conformance points:
|
||||
There are a few conformance points required in dmaengine drivers to
|
||||
accommodate assumptions made by applications using the async_tx API:
|
||||
1/ Completion callbacks are expected to happen in tasklet context
|
||||
2/ dma_async_tx_descriptor fields are never manipulated in IRQ context
|
||||
3/ Use async_tx_run_dependencies() in the descriptor clean up path to
|
||||
handle submission of dependent operations
|
||||
|
||||
4.2 "My application needs finer control of hardware channels"
|
||||
This requirement seems to arise from cases where a DMA engine driver is
|
||||
trying to support device-to-memory DMA. The dmaengine and async_tx
|
||||
implementations were designed for offloading memory-to-memory
|
||||
operations; however, there are some capabilities of the dmaengine layer
|
||||
that can be used for platform-specific channel management.
|
||||
Platform-specific constraints can be handled by registering the
|
||||
application as a 'dma_client' and implementing a 'dma_event_callback' to
|
||||
apply a filter to the available channels in the system. Before showing
|
||||
how to implement a custom dma_event callback some background of
|
||||
dmaengine's client support is required.
|
||||
|
||||
The following routines in dmaengine support multiple clients requesting
|
||||
use of a channel:
|
||||
- dma_async_client_register(struct dma_client *client)
|
||||
- dma_async_client_chan_request(struct dma_client *client)
|
||||
|
||||
dma_async_client_register takes a pointer to an initialized dma_client
|
||||
structure. It expects that the 'event_callback' and 'cap_mask' fields
|
||||
are already initialized.
|
||||
|
||||
dma_async_client_chan_request triggers dmaengine to notify the client of
|
||||
all channels that satisfy the capability mask. It is up to the client's
|
||||
event_callback routine to track how many channels the client needs and
|
||||
how many it is currently using. The dma_event_callback routine returns a
|
||||
dma_state_client code to let dmaengine know the status of the
|
||||
allocation.
|
||||
|
||||
Below is the example of how to extend this functionality for
|
||||
platform-specific filtering of the available channels beyond the
|
||||
standard capability mask:
|
||||
|
||||
static enum dma_state_client
|
||||
my_dma_client_callback(struct dma_client *client,
|
||||
struct dma_chan *chan, enum dma_state state)
|
||||
{
|
||||
struct dma_device *dma_dev;
|
||||
struct my_platform_specific_dma *plat_dma_dev;
|
||||
|
||||
dma_dev = chan->device;
|
||||
plat_dma_dev = container_of(dma_dev,
|
||||
struct my_platform_specific_dma,
|
||||
dma_dev);
|
||||
|
||||
if (!plat_dma_dev->platform_specific_capability)
|
||||
return DMA_DUP;
|
||||
|
||||
. . .
|
||||
}
|
||||
|
||||
5 SOURCE
|
||||
include/linux/dmaengine.h: core header file for DMA drivers and clients
|
||||
drivers/dma/dmaengine.c: offload engine channel management routines
|
||||
drivers/dma/: location for offload engine drivers
|
||||
include/linux/async_tx.h: core header file for the async_tx api
|
||||
crypto/async_tx/async_tx.c: async_tx interface to dmaengine and common code
|
||||
crypto/async_tx/async_memcpy.c: copy offload
|
||||
crypto/async_tx/async_memset.c: memory fill offload
|
||||
crypto/async_tx/async_xor.c: xor and xor zero sum offload
|
@ -94,6 +94,8 @@ Your cooperation is appreciated.
|
||||
9 = /dev/urandom Faster, less secure random number gen.
|
||||
10 = /dev/aio Asynchronous I/O notification interface
|
||||
11 = /dev/kmsg Writes to this come out as printk's
|
||||
12 = /dev/oldmem Used by crashdump kernels to access
|
||||
the memory of the kernel that crashed.
|
||||
|
||||
1 block RAM disk
|
||||
0 = /dev/ram0 First RAM disk
|
||||
|
@ -18,6 +18,7 @@
|
||||
*.moc
|
||||
*.mod.c
|
||||
*.o
|
||||
*.o.*
|
||||
*.orig
|
||||
*.out
|
||||
*.pdf
|
||||
@ -163,6 +164,8 @@ raid6tables.c
|
||||
relocs
|
||||
series
|
||||
setup
|
||||
setup.bin
|
||||
setup.elf
|
||||
sim710_d.h*
|
||||
sImage
|
||||
sm_tbl*
|
||||
|
@ -207,7 +207,7 @@ responsibility. This is usually non-issue because bus ops and
|
||||
resource allocations already do the job.
|
||||
|
||||
For an example of single-instance devres type, read pcim_iomap_table()
|
||||
in lib/iomap.c.
|
||||
in lib/devres.c.
|
||||
|
||||
All devres interface functions can be called without context if the
|
||||
right gfp mask is given.
|
||||
|
@ -2,22 +2,42 @@
|
||||
|
||||
EDAC - Error Detection And Correction
|
||||
|
||||
Written by Doug Thompson <norsk5@xmission.com>
|
||||
Written by Doug Thompson <dougthompson@xmission.com>
|
||||
7 Dec 2005
|
||||
17 Jul 2007 Updated
|
||||
|
||||
|
||||
EDAC was written by:
|
||||
Thayne Harbaugh,
|
||||
modified by Dave Peterson, Doug Thompson, et al,
|
||||
from the bluesmoke.sourceforge.net project.
|
||||
EDAC is maintained and written by:
|
||||
|
||||
Doug Thompson, Dave Jiang, Dave Peterson et al,
|
||||
original author: Thayne Harbaugh,
|
||||
|
||||
Contact:
|
||||
website: bluesmoke.sourceforge.net
|
||||
mailing list: bluesmoke-devel@lists.sourceforge.net
|
||||
|
||||
"bluesmoke" was the name for this device driver when it was "out-of-tree"
|
||||
and maintained at sourceforge.net. When it was pushed into 2.6.16 for the
|
||||
first time, it was renamed to 'EDAC'.
|
||||
|
||||
The bluesmoke project at sourceforge.net is now utilized as a 'staging area'
|
||||
for EDAC development, before it is sent upstream to kernel.org
|
||||
|
||||
At the bluesmoke/EDAC project site, is a series of quilt patches against
|
||||
recent kernels, stored in a SVN respository. For easier downloading, there
|
||||
is also a tarball snapshot available.
|
||||
|
||||
============================================================================
|
||||
EDAC PURPOSE
|
||||
|
||||
The 'edac' kernel module goal is to detect and report errors that occur
|
||||
within the computer system. In the initial release, memory Correctable Errors
|
||||
(CE) and Uncorrectable Errors (UE) are the primary errors being harvested.
|
||||
within the computer system running under linux.
|
||||
|
||||
MEMORY
|
||||
|
||||
In the initial release, memory Correctable Errors (CE) and Uncorrectable
|
||||
Errors (UE) are the primary errors being harvested. These types of errors
|
||||
are harvested by the 'edac_mc' class of device.
|
||||
|
||||
Detecting CE events, then harvesting those events and reporting them,
|
||||
CAN be a predictor of future UE events. With CE events, the system can
|
||||
@ -25,9 +45,27 @@ continue to operate, but with less safety. Preventive maintenance and
|
||||
proactive part replacement of memory DIMMs exhibiting CEs can reduce
|
||||
the likelihood of the dreaded UE events and system 'panics'.
|
||||
|
||||
NON-MEMORY
|
||||
|
||||
A new feature for EDAC, the edac_device class of device, was added in
|
||||
the 2.6.23 version of the kernel.
|
||||
|
||||
This new device type allows for non-memory type of ECC hardware detectors
|
||||
to have their states harvested and presented to userspace via the sysfs
|
||||
interface.
|
||||
|
||||
Some architectures have ECC detectors for L1, L2 and L3 caches, along with DMA
|
||||
engines, fabric switches, main data path switches, interconnections,
|
||||
and various other hardware data paths. If the hardware reports it, then
|
||||
a edac_device device probably can be constructed to harvest and present
|
||||
that to userspace.
|
||||
|
||||
|
||||
PCI BUS SCANNING
|
||||
|
||||
In addition, PCI Bus Parity and SERR Errors are scanned for on PCI devices
|
||||
in order to determine if errors are occurring on data transfers.
|
||||
|
||||
The presence of PCI Parity errors must be examined with a grain of salt.
|
||||
There are several add-in adapters that do NOT follow the PCI specification
|
||||
with regards to Parity generation and reporting. The specification says
|
||||
@ -35,11 +73,17 @@ the vendor should tie the parity status bits to 0 if they do not intend
|
||||
to generate parity. Some vendors do not do this, and thus the parity bit
|
||||
can "float" giving false positives.
|
||||
|
||||
[There are patches in the kernel queue which will allow for storage of
|
||||
quirks of PCI devices reporting false parity positives. The 2.6.18
|
||||
kernel should have those patches included. When that becomes available,
|
||||
then EDAC will be patched to utilize that information to "skip" such
|
||||
devices.]
|
||||
In the kernel there is a pci device attribute located in sysfs that is
|
||||
checked by the EDAC PCI scanning code. If that attribute is set,
|
||||
PCI parity/error scannining is skipped for that device. The attribute
|
||||
is:
|
||||
|
||||
broken_parity_status
|
||||
|
||||
as is located in /sys/devices/pci<XXX>/0000:XX:YY.Z directorys for
|
||||
PCI devices.
|
||||
|
||||
FUTURE HARDWARE SCANNING
|
||||
|
||||
EDAC will have future error detectors that will be integrated with
|
||||
EDAC or added to it, in the following list:
|
||||
@ -57,13 +101,14 @@ and the like.
|
||||
============================================================================
|
||||
EDAC VERSIONING
|
||||
|
||||
EDAC is composed of a "core" module (edac_mc.ko) and several Memory
|
||||
EDAC is composed of a "core" module (edac_core.ko) and several Memory
|
||||
Controller (MC) driver modules. On a given system, the CORE
|
||||
is loaded and one MC driver will be loaded. Both the CORE and
|
||||
the MC driver have individual versions that reflect current release
|
||||
level of their respective modules. Thus, to "report" on what version
|
||||
a system is running, one must report both the CORE's and the
|
||||
MC driver's versions.
|
||||
the MC driver (or edac_device driver) have individual versions that reflect
|
||||
current release level of their respective modules.
|
||||
|
||||
Thus, to "report" on what version a system is running, one must report both
|
||||
the CORE's and the MC driver's versions.
|
||||
|
||||
|
||||
LOADING
|
||||
@ -88,8 +133,9 @@ EDAC sysfs INTERFACE
|
||||
EDAC presents a 'sysfs' interface for control, reporting and attribute
|
||||
reporting purposes.
|
||||
|
||||
EDAC lives in the /sys/devices/system/edac directory. Within this directory
|
||||
there currently reside 2 'edac' components:
|
||||
EDAC lives in the /sys/devices/system/edac directory.
|
||||
|
||||
Within this directory there currently reside 2 'edac' components:
|
||||
|
||||
mc memory controller(s) system
|
||||
pci PCI control and status system
|
||||
@ -188,7 +234,7 @@ In directory 'mc' are EDAC system overall control and attribute files:
|
||||
|
||||
Panic on UE control file:
|
||||
|
||||
'panic_on_ue'
|
||||
'edac_mc_panic_on_ue'
|
||||
|
||||
An uncorrectable error will cause a machine panic. This is usually
|
||||
desirable. It is a bad idea to continue when an uncorrectable error
|
||||
@ -199,12 +245,12 @@ Panic on UE control file:
|
||||
|
||||
LOAD TIME: module/kernel parameter: panic_on_ue=[0|1]
|
||||
|
||||
RUN TIME: echo "1" >/sys/devices/system/edac/mc/panic_on_ue
|
||||
RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_panic_on_ue
|
||||
|
||||
|
||||
Log UE control file:
|
||||
|
||||
'log_ue'
|
||||
'edac_mc_log_ue'
|
||||
|
||||
Generate kernel messages describing uncorrectable errors. These errors
|
||||
are reported through the system message log system. UE statistics
|
||||
@ -212,12 +258,12 @@ Log UE control file:
|
||||
|
||||
LOAD TIME: module/kernel parameter: log_ue=[0|1]
|
||||
|
||||
RUN TIME: echo "1" >/sys/devices/system/edac/mc/log_ue
|
||||
RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ue
|
||||
|
||||
|
||||
Log CE control file:
|
||||
|
||||
'log_ce'
|
||||
'edac_mc_log_ce'
|
||||
|
||||
Generate kernel messages describing correctable errors. These
|
||||
errors are reported through the system message log system.
|
||||
@ -225,12 +271,12 @@ Log CE control file:
|
||||
|
||||
LOAD TIME: module/kernel parameter: log_ce=[0|1]
|
||||
|
||||
RUN TIME: echo "1" >/sys/devices/system/edac/mc/log_ce
|
||||
RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ce
|
||||
|
||||
|
||||
Polling period control file:
|
||||
|
||||
'poll_msec'
|
||||
'edac_mc_poll_msec'
|
||||
|
||||
The time period, in milliseconds, for polling for error information.
|
||||
Too small a value wastes resources. Too large a value might delay
|
||||
@ -241,7 +287,7 @@ Polling period control file:
|
||||
|
||||
LOAD TIME: module/kernel parameter: poll_msec=[0|1]
|
||||
|
||||
RUN TIME: echo "1000" >/sys/devices/system/edac/mc/poll_msec
|
||||
RUN TIME: echo "1000" >/sys/devices/system/edac/mc/edac_mc_poll_msec
|
||||
|
||||
|
||||
============================================================================
|
||||
@ -587,3 +633,95 @@ Parity Count:
|
||||
|
||||
|
||||
=======================================================================
|
||||
|
||||
|
||||
EDAC_DEVICE type of device
|
||||
|
||||
In the header file, edac_core.h, there is a series of edac_device structures
|
||||
and APIs for the EDAC_DEVICE.
|
||||
|
||||
User space access to an edac_device is through the sysfs interface.
|
||||
|
||||
At the location /sys/devices/system/edac (sysfs) new edac_device devices will
|
||||
appear.
|
||||
|
||||
There is a three level tree beneath the above 'edac' directory. For example,
|
||||
the 'test_device_edac' device (found at the bluesmoke.sourceforget.net website)
|
||||
installs itself as:
|
||||
|
||||
/sys/devices/systm/edac/test-instance
|
||||
|
||||
in this directory are various controls, a symlink and one or more 'instance'
|
||||
directorys.
|
||||
|
||||
The standard default controls are:
|
||||
|
||||
log_ce boolean to log CE events
|
||||
log_ue boolean to log UE events
|
||||
panic_on_ue boolean to 'panic' the system if an UE is encountered
|
||||
(default off, can be set true via startup script)
|
||||
poll_msec time period between POLL cycles for events
|
||||
|
||||
The test_device_edac device adds at least one of its own custom control:
|
||||
|
||||
test_bits which in the current test driver does nothing but
|
||||
show how it is installed. A ported driver can
|
||||
add one or more such controls and/or attributes
|
||||
for specific uses.
|
||||
One out-of-tree driver uses controls here to allow
|
||||
for ERROR INJECTION operations to hardware
|
||||
injection registers
|
||||
|
||||
The symlink points to the 'struct dev' that is registered for this edac_device.
|
||||
|
||||
INSTANCES
|
||||
|
||||
One or more instance directories are present. For the 'test_device_edac' case:
|
||||
|
||||
test-instance0
|
||||
|
||||
|
||||
In this directory there are two default counter attributes, which are totals of
|
||||
counter in deeper subdirectories.
|
||||
|
||||
ce_count total of CE events of subdirectories
|
||||
ue_count total of UE events of subdirectories
|
||||
|
||||
BLOCKS
|
||||
|
||||
At the lowest directory level is the 'block' directory. There can be 0, 1
|
||||
or more blocks specified in each instance.
|
||||
|
||||
test-block0
|
||||
|
||||
|
||||
In this directory the default attributes are:
|
||||
|
||||
ce_count which is counter of CE events for this 'block'
|
||||
of hardware being monitored
|
||||
ue_count which is counter of UE events for this 'block'
|
||||
of hardware being monitored
|
||||
|
||||
|
||||
The 'test_device_edac' device adds 4 attributes and 1 control:
|
||||
|
||||
test-block-bits-0 for every POLL cycle this counter
|
||||
is incremented
|
||||
test-block-bits-1 every 10 cycles, this counter is bumped once,
|
||||
and test-block-bits-0 is set to 0
|
||||
test-block-bits-2 every 100 cycles, this counter is bumped once,
|
||||
and test-block-bits-1 is set to 0
|
||||
test-block-bits-3 every 1000 cycles, this counter is bumped once,
|
||||
and test-block-bits-2 is set to 0
|
||||
|
||||
|
||||
reset-counters writing ANY thing to this control will
|
||||
reset all the above counters.
|
||||
|
||||
|
||||
Use of the 'test_device_edac' driver should any others to create their own
|
||||
unique drivers for their hardware systems.
|
||||
|
||||
The 'test_device_edac' sample driver is located at the
|
||||
bluesmoke.sourceforge.net project site for EDAC.
|
||||
|
||||
|
@ -9,19 +9,29 @@ for accessing the i2c bus and the gpio pins of the bt8xx chipset.
|
||||
Please see Documentation/dvb/cards.txt => o Cards based on the Conexant Bt8xx PCI bridge:
|
||||
|
||||
Compiling kernel please enable:
|
||||
a.)"Device drivers" => "Multimedia devices" => "Video For Linux" => "BT848 Video For Linux"
|
||||
b.)"Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices"
|
||||
=> "DVB for Linux" "DVB Core Support" "Bt8xx based PCI Cards"
|
||||
a.)"Device drivers" => "Multimedia devices" => "Video For Linux" => "Enable Video for Linux API 1 (DEPRECATED)"
|
||||
b.)"Device drivers" => "Multimedia devices" => "Video For Linux" => "Video Capture Adapters" => "BT848 Video For Linux"
|
||||
c.)"Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices" => "DVB for Linux" "DVB Core Support" "Bt8xx based PCI Cards"
|
||||
|
||||
Please use the following options with care as deselection of drivers which are in fact necessary
|
||||
may result in DVB devices that cannot be tuned due to lack of driver support:
|
||||
You can save RAM by deselecting every frontend module that your DVB card does not need.
|
||||
|
||||
First please remove the static dependency of DVB card drivers on all frontend modules for all possible card variants by enabling:
|
||||
d.) "Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices"
|
||||
=> "DVB for Linux" "DVB Core Support" "Load and attach frontend modules as needed"
|
||||
|
||||
If you know the frontend driver that your card needs please enable:
|
||||
e.)"Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices"
|
||||
=> "DVB for Linux" "DVB Core Support" "Customise DVB Frontends" => "Customise the frontend modules to build"
|
||||
Then please select your card-specific frontend module.
|
||||
|
||||
2) Loading Modules
|
||||
==================
|
||||
|
||||
In default cases bttv is loaded automatically.
|
||||
To load the backend either place dvb-bt8xx in etc/modules, or apply manually:
|
||||
|
||||
$ modprobe dvb-bt8xx
|
||||
|
||||
All frontends will be loaded automatically.
|
||||
Regular case: If the bttv driver detects a bt8xx-based DVB card, all frontend and backend modules will be loaded automatically.
|
||||
Exceptions are:
|
||||
- Old TwinHan DST cards or clones with or without CA slot and not containing an Eeprom.
|
||||
People running udev please see Documentation/dvb/udev.txt.
|
||||
|
||||
In the following cases overriding the PCI type detection for dvb-bt8xx might be necessary:
|
||||
@ -30,7 +40,6 @@ In the following cases overriding the PCI type detection for dvb-bt8xx might be
|
||||
------------------------------
|
||||
|
||||
$ modprobe bttv card=113
|
||||
$ modprobe dvb-bt8xx
|
||||
$ modprobe dst
|
||||
|
||||
Useful parameters for verbosity level and debugging the dst module:
|
||||
@ -65,10 +74,9 @@ DViCO FusionHDTV 5 Lite: 135
|
||||
Notice: The order of the card ID should be uprising:
|
||||
Example:
|
||||
$ modprobe bttv card=113 card=135
|
||||
$ modprobe dvb-bt8xx
|
||||
|
||||
For a full list of card ID's please see Documentation/video4linux/CARDLIST.bttv.
|
||||
In case of further problems send questions to the mailing list: www.linuxdvb.org.
|
||||
In case of further problems please subscribe and send questions to the mailing list: linux-dvb@linuxtv.org.
|
||||
|
||||
Authors: Richard Walker,
|
||||
Jamie Honan,
|
||||
|
@ -150,7 +150,7 @@ Some very frequently asked questions about linuxtv-dvb
|
||||
- saa7146_vv: SAA7146 video and vbi functions. These are only needed
|
||||
for full-featured cards.
|
||||
|
||||
- video-buf: capture helper module for the saa7146_vv driver. This
|
||||
- videobuf-dma-sg: capture helper module for the saa7146_vv driver. This
|
||||
one is responsible to handle capture buffers.
|
||||
|
||||
- dvb-ttpci: The main driver for AV7110 based, full-featured
|
||||
|
@ -24,7 +24,8 @@ use IO::Handle;
|
||||
@components = ( "sp8870", "sp887x", "tda10045", "tda10046",
|
||||
"tda10046lifeview", "av7110", "dec2000t", "dec2540t",
|
||||
"dec3000s", "vp7041", "dibusb", "nxt2002", "nxt2004",
|
||||
"or51211", "or51132_qam", "or51132_vsb", "bluebird");
|
||||
"or51211", "or51132_qam", "or51132_vsb", "bluebird",
|
||||
"opera1");
|
||||
|
||||
# Check args
|
||||
syntax() if (scalar(@ARGV) != 1);
|
||||
@ -56,7 +57,7 @@ syntax();
|
||||
|
||||
sub sp8870 {
|
||||
my $sourcefile = "tt_Premium_217g.zip";
|
||||
my $url = "http://www.technotrend.de/new/217g/$sourcefile";
|
||||
my $url = "http://www.softwarepatch.pl/9999ccd06a4813cb827dbb0005071c71/$sourcefile";
|
||||
my $hash = "53970ec17a538945a6d8cb608a7b3899";
|
||||
my $outfile = "dvb-fe-sp8870.fw";
|
||||
my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
|
||||
@ -110,21 +111,21 @@ sub tda10045 {
|
||||
}
|
||||
|
||||
sub tda10046 {
|
||||
my $sourcefile = "tt_budget_217g.zip";
|
||||
my $url = "http://www.technotrend.de/new/217g/$sourcefile";
|
||||
my $hash = "6a7e1e2f2644b162ff0502367553c72d";
|
||||
my $outfile = "dvb-fe-tda10046.fw";
|
||||
my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
|
||||
my $sourcefile = "TT_PCI_2.19h_28_11_2006.zip";
|
||||
my $url = "http://technotrend-online.com/download/software/219/$sourcefile";
|
||||
my $hash = "6a7e1e2f2644b162ff0502367553c72d";
|
||||
my $outfile = "dvb-fe-tda10046.fw";
|
||||
my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
|
||||
|
||||
checkstandard();
|
||||
checkstandard();
|
||||
|
||||
wgetfile($sourcefile, $url);
|
||||
unzip($sourcefile, $tmpdir);
|
||||
extract("$tmpdir/software/OEM/PCI/App/ttlcdacc.dll", 0x3f731, 24478, "$tmpdir/fwtmp");
|
||||
verify("$tmpdir/fwtmp", $hash);
|
||||
copy("$tmpdir/fwtmp", $outfile);
|
||||
wgetfile($sourcefile, $url);
|
||||
unzip($sourcefile, $tmpdir);
|
||||
extract("$tmpdir/TT_PCI_2.19h_28_11_2006/software/OEM/PCI/App/ttlcdacc.dll", 0x65389, 24478, "$tmpdir/fwtmp");
|
||||
verify("$tmpdir/fwtmp", $hash);
|
||||
copy("$tmpdir/fwtmp", $outfile);
|
||||
|
||||
$outfile;
|
||||
$outfile;
|
||||
}
|
||||
|
||||
sub tda10046lifeview {
|
||||
@ -210,6 +211,45 @@ sub dec3000s {
|
||||
|
||||
$outfile;
|
||||
}
|
||||
sub opera1{
|
||||
my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 0);
|
||||
|
||||
checkstandard();
|
||||
my $fwfile1="dvb-usb-opera1-fpga-01.fw";
|
||||
my $fwfile2="dvb-usb-opera-01.fw";
|
||||
extract("2830SCap2.sys", 0x62e8, 55024, "$tmpdir/opera1-fpga.fw");
|
||||
extract("2830SLoad2.sys",0x3178,0x3685-0x3178,"$tmpdir/fw1part1");
|
||||
extract("2830SLoad2.sys",0x0980,0x3150-0x0980,"$tmpdir/fw1part2");
|
||||
delzero("$tmpdir/fw1part1","$tmpdir/fw1part1-1");
|
||||
delzero("$tmpdir/fw1part2","$tmpdir/fw1part2-1");
|
||||
verify("$tmpdir/fw1part1-1","5e0909858fdf0b5b09ad48b9fe622e70");
|
||||
verify("$tmpdir/fw1part2-1","d6e146f321427e931df2c6fcadac37a1");
|
||||
verify("$tmpdir/opera1-fpga.fw","0f8133f5e9051f5f3c1928f7e5a1b07d");
|
||||
|
||||
my $RES1="\x01\x92\x7f\x00\x01\x00";
|
||||
my $RES0="\x01\x92\x7f\x00\x00\x00";
|
||||
my $DAT1="\x01\x00\xe6\x00\x01\x00";
|
||||
my $DAT0="\x01\x00\xe6\x00\x00\x00";
|
||||
open FW,">$tmpdir/opera.fw";
|
||||
print FW "$RES1";
|
||||
print FW "$DAT1";
|
||||
print FW "$RES1";
|
||||
print FW "$DAT1";
|
||||
appendfile(FW,"$tmpdir/fw1part1-1");
|
||||
print FW "$RES0";
|
||||
print FW "$DAT0";
|
||||
print FW "$RES1";
|
||||
print FW "$DAT1";
|
||||
appendfile(FW,"$tmpdir/fw1part2-1");
|
||||
print FW "$RES1";
|
||||
print FW "$DAT1";
|
||||
print FW "$RES0";
|
||||
print FW "$DAT0";
|
||||
copy ("$tmpdir/opera1-fpga.fw",$fwfile1);
|
||||
copy ("$tmpdir/opera.fw",$fwfile2);
|
||||
|
||||
$fwfile1.",".$fwfile2;
|
||||
}
|
||||
|
||||
sub vp7041 {
|
||||
my $sourcefile = "2.422.zip";
|
||||
@ -440,6 +480,25 @@ sub appendfile {
|
||||
close(INFILE);
|
||||
}
|
||||
|
||||
sub delzero{
|
||||
my ($infile,$outfile) =@_;
|
||||
|
||||
open INFILE,"<$infile";
|
||||
open OUTFILE,">$outfile";
|
||||
while (1){
|
||||
$rcount=sysread(INFILE,$buf,22);
|
||||
$len=ord(substr($buf,0,1));
|
||||
print OUTFILE substr($buf,0,1);
|
||||
print OUTFILE substr($buf,2,$len+3);
|
||||
last if ($rcount<1);
|
||||
printf OUTFILE "%c",0;
|
||||
#print $len." ".length($buf)."\n";
|
||||
|
||||
}
|
||||
close(INFILE);
|
||||
close(OUTFILE);
|
||||
}
|
||||
|
||||
sub syntax() {
|
||||
print STDERR "syntax: get_dvb_firmware <component>\n";
|
||||
print STDERR "Supported components:\n";
|
||||
|
27
Documentation/dvb/opera-firmware.txt
Normal file
27
Documentation/dvb/opera-firmware.txt
Normal file
@ -0,0 +1,27 @@
|
||||
To extract the firmware for the Opera DVB-S1 USB-Box
|
||||
you need to copy the files:
|
||||
|
||||
2830SCap2.sys
|
||||
2830SLoad2.sys
|
||||
|
||||
from the windriver disk into this directory.
|
||||
|
||||
Then run
|
||||
|
||||
./get_dvb_firware opera1
|
||||
|
||||
and after that you have 2 files:
|
||||
|
||||
dvb-usb-opera-01.fw
|
||||
dvb-usb-opera1-fpga-01.fw
|
||||
|
||||
in here.
|
||||
|
||||
Copy them into /lib/firmware/ .
|
||||
|
||||
After that the driver can load the firmware
|
||||
(if you have enabled firmware loading
|
||||
in kernel config and have hotplug running).
|
||||
|
||||
|
||||
Marco Gittler <g.marco@freenet.de>
|
@ -9,14 +9,13 @@ one found in the Dreamcast.
|
||||
Advantages:
|
||||
|
||||
* It provides a nice large console (128 cols + 48 lines with 1024x768)
|
||||
without using tiny, unreadable fonts.
|
||||
without using tiny, unreadable fonts (NOT on the Dreamcast)
|
||||
* You can run XF86_FBDev on top of /dev/fb0
|
||||
* Most important: boot logo :-)
|
||||
|
||||
Disadvantages:
|
||||
|
||||
* Driver is currently limited to the Dreamcast PowerVR 2 implementation
|
||||
at the time of this writing.
|
||||
* Driver is largely untested on non-Dreamcast systems.
|
||||
|
||||
Configuration
|
||||
=============
|
||||
@ -29,11 +28,16 @@ Accepted options:
|
||||
font:X - default font to use. All fonts are supported, including the
|
||||
SUN12x22 font which is very nice at high resolutions.
|
||||
|
||||
mode:X - default video mode. The following video modes are supported:
|
||||
640x240-60, 640x480-60.
|
||||
|
||||
mode:X - default video mode with format [xres]x[yres]-<bpp>@<refresh rate>
|
||||
The following video modes are supported:
|
||||
640x640-16@60, 640x480-24@60, 640x480-32@60. The Dreamcast
|
||||
defaults to 640x480-16@60. At the time of writing the
|
||||
24bpp and 32bpp modes function poorly. Work to fix that is
|
||||
ongoing
|
||||
|
||||
Note: the 640x240 mode is currently broken, and should not be
|
||||
used for any reason. It is only mentioned as a reference.
|
||||
used for any reason. It is only mentioned here as a reference.
|
||||
|
||||
inverse - invert colors on screen (for LCD displays)
|
||||
|
||||
@ -52,10 +56,10 @@ output:X - output type. This can be any of the following: pal, ntsc, and
|
||||
X11
|
||||
===
|
||||
|
||||
XF86_FBDev should work, in theory. At the time of this writing it is
|
||||
totally untested and may or may not even portray the beginnings of
|
||||
working. If you end up testing this, please let me know!
|
||||
XF86_FBDev has been shown to work on the Dreamcast in the past - though not yet
|
||||
on any 2.6 series kernel.
|
||||
|
||||
--
|
||||
Paul Mundt <lethal@linuxdc.org>
|
||||
Updated by Adrian McMenamin <adrian@mcmen.demon.co.uk>
|
||||
|
||||
|
@ -26,9 +26,7 @@ Who: Hans Verkuil <hverkuil@xs4all.nl> and
|
||||
|
||||
---------------------------
|
||||
|
||||
What: /sys/devices/.../power/state
|
||||
dev->power.power_state
|
||||
dpm_runtime_{suspend,resume)()
|
||||
What: dev->power.power_state
|
||||
When: July 2007
|
||||
Why: Broken design for runtime control over driver power states, confusing
|
||||
driver-internal runtime power management with: mechanisms to support
|
||||
@ -53,6 +51,7 @@ Who: David Miller <davem@davemloft.net>
|
||||
What: Video4Linux API 1 ioctls and video_decoder.h from Video devices.
|
||||
When: December 2006
|
||||
Files: include/linux/video_decoder.h
|
||||
Check: include/linux/video_decoder.h
|
||||
Why: V4L1 AP1 was replaced by V4L2 API. during migration from 2.4 to 2.6
|
||||
series. The old API have lots of drawbacks and don't provide enough
|
||||
means to work with all video and audio standards. The newer API is
|
||||
@ -86,7 +85,7 @@ Who: Dominik Brodowski <linux@brodo.de>
|
||||
What: remove EXPORT_SYMBOL(kernel_thread)
|
||||
When: August 2006
|
||||
Files: arch/*/kernel/*_ksyms.c
|
||||
Funcs: kernel_thread
|
||||
Check: kernel_thread
|
||||
Why: kernel_thread is a low-level implementation detail. Drivers should
|
||||
use the <linux/kthread.h> API instead which shields them from
|
||||
implementation details and provides a higherlevel interface that
|
||||
@ -137,6 +136,15 @@ Who: Greg Kroah-Hartman <gregkh@suse.de>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: vm_ops.nopage
|
||||
When: Soon, provided in-kernel callers have been converted
|
||||
Why: This interface is replaced by vm_ops.fault, but it has been around
|
||||
forever, is used by a lot of drivers, and doesn't cost much to
|
||||
maintain.
|
||||
Who: Nick Piggin <npiggin@suse.de>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: Interrupt only SA_* flags
|
||||
When: September 2007
|
||||
Why: The interrupt related SA_* flags are replaced by IRQF_* to move them
|
||||
@ -156,15 +164,6 @@ Who: Kay Sievers <kay.sievers@suse.de>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: i2c-isa
|
||||
When: December 2006
|
||||
Why: i2c-isa is a non-sense and doesn't fit in the device driver
|
||||
model. Drivers relying on it are better implemented as platform
|
||||
drivers.
|
||||
Who: Jean Delvare <khali@linux-fr.org>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: i2c_adapter.list
|
||||
When: July 2007
|
||||
Why: Superfluous, this list duplicates the one maintained by the driver
|
||||
@ -181,24 +180,11 @@ Who: Adrian Bunk <bunk@stusta.de>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: /sys/firmware/acpi/namespace
|
||||
When: 2.6.21
|
||||
Why: The ACPI namespace is effectively the symbol list for
|
||||
the BIOS. The device names are completely arbitrary
|
||||
and have no place being exposed to user-space.
|
||||
|
||||
For those interested in the BIOS ACPI namespace,
|
||||
the BIOS can be extracted and disassembled with acpidump
|
||||
and iasl as documented in the pmtools package here:
|
||||
http://ftp.kernel.org/pub/linux/kernel/people/lenb/acpi/utils
|
||||
Who: Len Brown <len.brown@intel.com>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: ACPI procfs interface
|
||||
When: July 2007
|
||||
Why: After ACPI sysfs conversion, ACPI attributes will be duplicated
|
||||
in sysfs and the ACPI procfs interface should be removed.
|
||||
When: July 2008
|
||||
Why: ACPI sysfs conversion should be finished by January 2008.
|
||||
ACPI procfs interface will be removed in July 2008 so that
|
||||
there is enough time for the user space to catch up.
|
||||
Who: Zhang Rui <rui.zhang@intel.com>
|
||||
|
||||
---------------------------
|
||||
@ -211,6 +197,14 @@ Who: Len Brown <len.brown@intel.com>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: /proc/acpi/event
|
||||
When: February 2008
|
||||
Why: /proc/acpi/event has been replaced by events via the input layer
|
||||
and netlink since 2.6.23.
|
||||
Who: Len Brown <len.brown@intel.com>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: Compaq touchscreen device emulation
|
||||
When: Oct 2007
|
||||
Files: drivers/input/tsdev.c
|
||||
@ -225,22 +219,6 @@ Who: Richard Purdie <rpurdie@rpsys.net>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer)
|
||||
When: December 2007
|
||||
Why: These functions are a leftover from 2.4 times. They have several
|
||||
problems:
|
||||
- Duplication of checks that are done in the device driver's
|
||||
interrupt handler
|
||||
- common I/O layer can't do device specific error recovery
|
||||
- device driver can't be notified for conditions happening during
|
||||
execution of the function
|
||||
Device drivers should issue the read device characteristics and read
|
||||
configuration data ccws and do the appropriate error handling
|
||||
themselves.
|
||||
Who: Cornelia Huck <cornelia.huck@de.ibm.com>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers
|
||||
When: September 2007
|
||||
Why: Obsolete. The new i2c-gpio driver replaces all hardware-specific
|
||||
@ -310,3 +288,42 @@ Why: The arch/powerpc tree is the merged architecture for ppc32 and ppc64
|
||||
Who: linuxppc-dev@ozlabs.org
|
||||
|
||||
---------------------------
|
||||
|
||||
What: mthca driver's MSI support
|
||||
When: January 2008
|
||||
Files: drivers/infiniband/hw/mthca/*.[ch]
|
||||
Why: All mthca hardware also supports MSI-X, which provides
|
||||
strictly more functionality than MSI. So there is no point in
|
||||
having both MSI-X and MSI support in the driver.
|
||||
Who: Roland Dreier <rolandd@cisco.com>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: sk98lin network driver
|
||||
When: Feburary 2008
|
||||
Why: In kernel tree version of driver is unmaintained. Sk98lin driver
|
||||
replaced by the skge driver.
|
||||
Who: Stephen Hemminger <shemminger@linux-foundation.org>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: i386/x86_64 bzImage symlinks
|
||||
When: April 2008
|
||||
|
||||
Why: The i386/x86_64 merge provides a symlink to the old bzImage
|
||||
location so not yet updated user space tools, e.g. package
|
||||
scripts, do not break.
|
||||
Who: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: shaper network driver
|
||||
When: January 2008
|
||||
Files: drivers/net/shaper.c, include/linux/if_shaper.h
|
||||
Why: This driver has been marked obsolete for many years.
|
||||
It was only designed to work on lower speed links and has design
|
||||
flaws that lead to machine crashes. The qdisc infrastructure in
|
||||
2.4 or later kernels, provides richer features and is more robust.
|
||||
Who: Stephen Hemminger <shemminger@linux-foundation.org>
|
||||
|
||||
---------------------------
|
||||
|
@ -32,6 +32,8 @@ directory-locking
|
||||
- info about the locking scheme used for directory operations.
|
||||
dlmfs.txt
|
||||
- info on the userspace interface to the OCFS2 DLM.
|
||||
ecryptfs.txt
|
||||
- docs on eCryptfs: stacked cryptographic filesystem for Linux.
|
||||
ext2.txt
|
||||
- info, mount options and specifications for the Ext2 filesystem.
|
||||
ext3.txt
|
||||
|
@ -6,12 +6,26 @@ ABOUT
|
||||
|
||||
v9fs is a Unix implementation of the Plan 9 9p remote filesystem protocol.
|
||||
|
||||
This software was originally developed by Ron Minnich <rminnich@lanl.gov>
|
||||
and Maya Gokhale <maya@lanl.gov>. Additional development by Greg Watson
|
||||
This software was originally developed by Ron Minnich <rminnich@sandia.gov>
|
||||
and Maya Gokhale. Additional development by Greg Watson
|
||||
<gwatson@lanl.gov> and most recently Eric Van Hensbergen
|
||||
<ericvh@gmail.com>, Latchesar Ionkov <lucho@ionkov.net> and Russ Cox
|
||||
<rsc@swtch.com>.
|
||||
|
||||
The best detailed explanation of the Linux implementation and applications of
|
||||
the 9p client is available in the form of a USENIX paper:
|
||||
http://www.usenix.org/events/usenix05/tech/freenix/hensbergen.html
|
||||
|
||||
Other applications are described in the following papers:
|
||||
* XCPU & Clustering
|
||||
http://www.xcpu.org/xcpu-talk.pdf
|
||||
* KVMFS: control file system for KVM
|
||||
http://www.xcpu.org/kvmfs.pdf
|
||||
* CellFS: A New ProgrammingModel for the Cell BE
|
||||
http://www.xcpu.org/cellfs-talk.pdf
|
||||
* PROSE I/O: Using 9p to enable Application Partitions
|
||||
http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf
|
||||
|
||||
USAGE
|
||||
=====
|
||||
|
||||
@ -90,9 +104,9 @@ subset of the namespace by extending the path: '#U*'/tmp would just export
|
||||
and export.
|
||||
|
||||
A Linux version of the 9p server is now maintained under the npfs project
|
||||
on sourceforge (http://sourceforge.net/projects/npfs). There is also a
|
||||
more stable single-threaded version of the server (named spfs) available from
|
||||
the same CVS repository.
|
||||
on sourceforge (http://sourceforge.net/projects/npfs). The currently
|
||||
maintained version is the single-threaded version of the server (named spfs)
|
||||
available from the same CVS repository.
|
||||
|
||||
There are user and developer mailing lists available through the v9fs project
|
||||
on sourceforge (http://sourceforge.net/projects/v9fs).
|
||||
|
@ -510,13 +510,24 @@ More details about quota locking can be found in fs/dquot.c.
|
||||
prototypes:
|
||||
void (*open)(struct vm_area_struct*);
|
||||
void (*close)(struct vm_area_struct*);
|
||||
int (*fault)(struct vm_area_struct*, struct vm_fault *);
|
||||
struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *);
|
||||
int (*page_mkwrite)(struct vm_area_struct *, struct page *);
|
||||
|
||||
locking rules:
|
||||
BKL mmap_sem
|
||||
BKL mmap_sem PageLocked(page)
|
||||
open: no yes
|
||||
close: no yes
|
||||
fault: no yes
|
||||
nopage: no yes
|
||||
page_mkwrite: no yes no
|
||||
|
||||
->page_mkwrite() is called when a previously read-only page is
|
||||
about to become writeable. The file system is responsible for
|
||||
protecting against truncate races. Once appropriate action has been
|
||||
taking to lock out truncate, the page range should be verified to be
|
||||
within i_size. The page mapping should also be checked that it is not
|
||||
NULL.
|
||||
|
||||
================================================================================
|
||||
Dubious stuff
|
||||
|
@ -277,11 +277,10 @@ static struct config_item *simple_children_make_item(struct config_group *group,
|
||||
{
|
||||
struct simple_child *simple_child;
|
||||
|
||||
simple_child = kmalloc(sizeof(struct simple_child), GFP_KERNEL);
|
||||
simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
|
||||
if (!simple_child)
|
||||
return NULL;
|
||||
|
||||
memset(simple_child, 0, sizeof(struct simple_child));
|
||||
|
||||
config_item_init_type_name(&simple_child->item, name,
|
||||
&simple_child_type);
|
||||
@ -364,12 +363,11 @@ static struct config_group *group_children_make_group(struct config_group *group
|
||||
{
|
||||
struct simple_children *simple_children;
|
||||
|
||||
simple_children = kmalloc(sizeof(struct simple_children),
|
||||
simple_children = kzalloc(sizeof(struct simple_children),
|
||||
GFP_KERNEL);
|
||||
if (!simple_children)
|
||||
return NULL;
|
||||
|
||||
memset(simple_children, 0, sizeof(struct simple_children));
|
||||
|
||||
config_group_init_type_name(&simple_children->group, name,
|
||||
&simple_children_type);
|
||||
|
59
Documentation/filesystems/hfsplus.txt
Normal file
59
Documentation/filesystems/hfsplus.txt
Normal file
@ -0,0 +1,59 @@
|
||||
|
||||
Macintosh HFSPlus Filesystem for Linux
|
||||
======================================
|
||||
|
||||
HFSPlus is a filesystem first introduced in MacOS 8.1.
|
||||
HFSPlus has several extensions to HFS, including 32-bit allocation
|
||||
blocks, 255-character unicode filenames, and file sizes of 2^63 bytes.
|
||||
|
||||
|
||||
Mount options
|
||||
=============
|
||||
|
||||
When mounting an HFSPlus filesystem, the following options are accepted:
|
||||
|
||||
creator=cccc, type=cccc
|
||||
Specifies the creator/type values as shown by the MacOS finder
|
||||
used for creating new files. Default values: '????'.
|
||||
|
||||
uid=n, gid=n
|
||||
Specifies the user/group that owns all files on the filesystem
|
||||
that have uninitialized permissions structures.
|
||||
Default: user/group id of the mounting process.
|
||||
|
||||
umask=n
|
||||
Specifies the umask (in octal) used for files and directories
|
||||
that have uninitialized permissions structures.
|
||||
Default: umask of the mounting process.
|
||||
|
||||
session=n
|
||||
Select the CDROM session to mount as HFSPlus filesystem. Defaults to
|
||||
leaving that decision to the CDROM driver. This option will fail
|
||||
with anything but a CDROM as underlying devices.
|
||||
|
||||
part=n
|
||||
Select partition number n from the devices. This option only makes
|
||||
sense for CDROMs because they can't be partitioned under Linux.
|
||||
For disk devices the generic partition parsing code does this
|
||||
for us. Defaults to not parsing the partition table at all.
|
||||
|
||||
decompose
|
||||
Decompose file name characters.
|
||||
|
||||
nodecompose
|
||||
Do not decompose file name characters.
|
||||
|
||||
force
|
||||
Used to force write access to volumes that are marked as journalled
|
||||
or locked. Use at your own risk.
|
||||
|
||||
nls=cccc
|
||||
Encoding to use when presenting file names.
|
||||
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
kernel source: <file:fs/hfsplus>
|
||||
|
||||
Apple Technote 1150 http://developer.apple.com/technotes/tn/tn1150.html
|
@ -407,7 +407,7 @@ raiddev /dev/md0
|
||||
device /dev/hda5
|
||||
raid-disk 0
|
||||
device /dev/hdb1
|
||||
raid-disl 1
|
||||
raid-disk 1
|
||||
|
||||
For linear raid, just change the raid-level above to "raid-level linear", for
|
||||
mirrors, change it to "raid-level 1", and for stripe sets with parity, change
|
||||
@ -457,6 +457,8 @@ ChangeLog
|
||||
|
||||
Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
|
||||
|
||||
2.1.29:
|
||||
- Fix a deadlock when mounting read-write.
|
||||
2.1.28:
|
||||
- Fix a deadlock.
|
||||
2.1.27:
|
||||
|
@ -28,11 +28,7 @@ Manish Singh <manish.singh@oracle.com>
|
||||
Caveats
|
||||
=======
|
||||
Features which OCFS2 does not support yet:
|
||||
- sparse files
|
||||
- extended attributes
|
||||
- shared writable mmap
|
||||
- loopback is supported, but data written will not
|
||||
be cluster coherent.
|
||||
- quotas
|
||||
- cluster aware flock
|
||||
- cluster aware lockf
|
||||
@ -57,3 +53,12 @@ nointr Do not allow signals to interrupt cluster
|
||||
atime_quantum=60(*) OCFS2 will not update atime unless this number
|
||||
of seconds has passed since the last update.
|
||||
Set to zero to always update atime.
|
||||
data=ordered (*) All data are forced directly out to the main file
|
||||
system prior to its metadata being committed to the
|
||||
journal.
|
||||
data=writeback Data ordering is not preserved, data may be written
|
||||
into the main file system after its metadata has been
|
||||
committed to the journal.
|
||||
preferred_slot=0(*) During mount, try to use this filesystem slot first. If
|
||||
it is in use by another node, the first empty one found
|
||||
will be chosen. Invalid values will be ignored.
|
||||
|
@ -42,6 +42,7 @@ Table of Contents
|
||||
2.12 /proc/<pid>/oom_adj - Adjust the oom-killer score
|
||||
2.13 /proc/<pid>/oom_score - Display current oom-killer score
|
||||
2.14 /proc/<pid>/io - Display the IO accounting fields
|
||||
2.15 /proc/<pid>/coredump_filter - Core dump filtering settings
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
Preface
|
||||
@ -1065,6 +1066,13 @@ check the amount of free space (value is in seconds). Default settings are: 4,
|
||||
resume it if we have a value of 3 or more percent; consider information about
|
||||
the amount of free space valid for 30 seconds
|
||||
|
||||
audit_argv_kb
|
||||
-------------
|
||||
|
||||
The file contains a single value denoting the limit on the argv array size
|
||||
for execve (in KiB). This limit is only applied when system call auditing for
|
||||
execve is enabled, otherwise the value is ignored.
|
||||
|
||||
ctrl-alt-del
|
||||
------------
|
||||
|
||||
@ -2177,4 +2185,41 @@ those 64-bit counters, process A could see an intermediate result.
|
||||
More information about this can be found within the taskstats documentation in
|
||||
Documentation/accounting.
|
||||
|
||||
2.15 /proc/<pid>/coredump_filter - Core dump filtering settings
|
||||
---------------------------------------------------------------
|
||||
When a process is dumped, all anonymous memory is written to a core file as
|
||||
long as the size of the core file isn't limited. But sometimes we don't want
|
||||
to dump some memory segments, for example, huge shared memory. Conversely,
|
||||
sometimes we want to save file-backed memory segments into a core file, not
|
||||
only the individual files.
|
||||
|
||||
/proc/<pid>/coredump_filter allows you to customize which memory segments
|
||||
will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
|
||||
of memory types. If a bit of the bitmask is set, memory segments of the
|
||||
corresponding memory type are dumped, otherwise they are not dumped.
|
||||
|
||||
The following 4 memory types are supported:
|
||||
- (bit 0) anonymous private memory
|
||||
- (bit 1) anonymous shared memory
|
||||
- (bit 2) file-backed private memory
|
||||
- (bit 3) file-backed shared memory
|
||||
|
||||
Note that MMIO pages such as frame buffer are never dumped and vDSO pages
|
||||
are always dumped regardless of the bitmask status.
|
||||
|
||||
Default value of coredump_filter is 0x3; this means all anonymous memory
|
||||
segments are dumped.
|
||||
|
||||
If you don't want to dump all shared memory segments attached to pid 1234,
|
||||
write 1 to the process's proc file.
|
||||
|
||||
$ echo 0x1 > /proc/1234/coredump_filter
|
||||
|
||||
When a new process is created, the process inherits the bitmask status from its
|
||||
parent. It is useful to set up coredump_filter before the program runs.
|
||||
For example:
|
||||
|
||||
$ echo 0x7 > /proc/self/coredump_filter
|
||||
$ ./some_program
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
|
@ -75,6 +75,9 @@ using the include file:
|
||||
If you stick to this convention then it'll be easier for other developers to
|
||||
see what your code is doing, and help maintain it.
|
||||
|
||||
Note that these operations include I/O barriers on platforms which need to
|
||||
use them; drivers don't need to add them explicitly.
|
||||
|
||||
|
||||
Identifying GPIOs
|
||||
-----------------
|
||||
@ -145,7 +148,7 @@ pin ... that won't always match the specified output value, because of
|
||||
issues including wire-OR and output latencies.
|
||||
|
||||
The get/set calls have no error returns because "invalid GPIO" should have
|
||||
been reported earlier in gpio_set_direction(). However, note that not all
|
||||
been reported earlier from gpio_direction_*(). However, note that not all
|
||||
platforms can read the value of output pins; those that can't should always
|
||||
return zero. Also, using these calls for GPIOs that can't safely be accessed
|
||||
without sleeping (see below) is an error.
|
||||
@ -236,7 +239,7 @@ map between them using calls like:
|
||||
Those return either the corresponding number in the other namespace, or
|
||||
else a negative errno code if the mapping can't be done. (For example,
|
||||
some GPIOs can't used as IRQs.) It is an unchecked error to use a GPIO
|
||||
number that hasn't been marked as an input using gpio_set_direction(), or
|
||||
number that wasn't set up as an input using gpio_direction_input(), or
|
||||
to use an IRQ number that didn't originally come from gpio_to_irq().
|
||||
|
||||
These two mapping calls are expected to cost on the order of a single
|
||||
|
@ -5,7 +5,7 @@ for the 8254 and Real Time Clock (RTC) periodic timer functionality.
|
||||
Each HPET can have up to 32 timers. It is possible to configure the
|
||||
first two timers as legacy replacements for 8254 and RTC periodic timers.
|
||||
A specification done by Intel and Microsoft can be found at
|
||||
<http://www.intel.com/hardwaredesign/hpetspec.htm>.
|
||||
<http://www.intel.com/technology/architecture/hpetspec.htm>.
|
||||
|
||||
The driver supports detection of HPET driver allocation and initialization
|
||||
of the HPET before the driver module_init routine is called. This enables
|
||||
|
@ -2,7 +2,7 @@ Kernel driver abituguru
|
||||
=======================
|
||||
|
||||
Supported chips:
|
||||
* Abit uGuru revision 1-3 (Hardware Monitor part only)
|
||||
* Abit uGuru revision 1 & 2 (Hardware Monitor part only)
|
||||
Prefix: 'abituguru'
|
||||
Addresses scanned: ISA 0x0E0
|
||||
Datasheet: Not available, this driver is based on reverse engineering.
|
||||
@ -20,8 +20,8 @@ Supported chips:
|
||||
uGuru 2.1.0.0 ~ 2.1.2.8 (AS8, AV8, AA8, AG8, AA8XE, AX8)
|
||||
uGuru 2.2.0.0 ~ 2.2.0.6 (AA8 Fatal1ty)
|
||||
uGuru 2.3.0.0 ~ 2.3.0.9 (AN8)
|
||||
uGuru 3.0.0.0 ~ 3.0.1.2 (AW8, AL8, NI8)
|
||||
uGuru 4.xxxxx? (AT8 32X) (2)
|
||||
uGuru 3.0.0.0 ~ 3.0.x.x (AW8, AL8, AT8, NI8 SLI, AT8 32X, AN8 32X,
|
||||
AW9D-MAX) (2)
|
||||
1) For revisions 2 and 3 uGuru's the driver can autodetect the
|
||||
sensortype (Volt or Temp) for bank1 sensors, for revision 1 uGuru's
|
||||
this doesnot always work. For these uGuru's the autodection can
|
||||
@ -30,8 +30,9 @@ Supported chips:
|
||||
bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1
|
||||
You may also need to specify the fan_sensors option for these boards
|
||||
fan_sensors=5
|
||||
2) The current version of the abituguru driver is known to NOT work
|
||||
on these Motherboards
|
||||
2) There is a seperate abituguru3 driver for these motherboards,
|
||||
the abituguru (without the 3 !) driver will not work on these
|
||||
motherboards (and visa versa)!
|
||||
|
||||
Authors:
|
||||
Hans de Goede <j.w.r.degoede@hhs.nl>,
|
||||
@ -43,8 +44,10 @@ Module Parameters
|
||||
-----------------
|
||||
|
||||
* force: bool Force detection. Note this parameter only causes the
|
||||
detection to be skipped, if the uGuru can't be read
|
||||
the module initialization (insmod) will still fail.
|
||||
detection to be skipped, and thus the insmod to
|
||||
succeed. If the uGuru can't be read the actual hwmon
|
||||
driver will not load and thus no hwmon device will get
|
||||
registered.
|
||||
* bank1_types: int[] Bank1 sensortype autodetection override:
|
||||
-1 autodetect (default)
|
||||
0 volt sensor
|
||||
@ -69,13 +72,15 @@ dmesg | grep abituguru
|
||||
Description
|
||||
-----------
|
||||
|
||||
This driver supports the hardware monitoring features of the Abit uGuru chip
|
||||
found on Abit uGuru featuring motherboards (most modern Abit motherboards).
|
||||
This driver supports the hardware monitoring features of the first and
|
||||
second revision of the Abit uGuru chip found on Abit uGuru featuring
|
||||
motherboards (most modern Abit motherboards).
|
||||
|
||||
The uGuru chip in reality is a Winbond W83L950D in disguise (despite Abit
|
||||
claiming it is "a new microprocessor designed by the ABIT Engineers").
|
||||
Unfortunatly this doesn't help since the W83L950D is a generic
|
||||
microcontroller with a custom Abit application running on it.
|
||||
The first and second revision of the uGuru chip in reality is a Winbond
|
||||
W83L950D in disguise (despite Abit claiming it is "a new microprocessor
|
||||
designed by the ABIT Engineers"). Unfortunatly this doesn't help since the
|
||||
W83L950D is a generic microcontroller with a custom Abit application running
|
||||
on it.
|
||||
|
||||
Despite Abit not releasing any information regarding the uGuru, Olle
|
||||
Sandberg <ollebull@gmail.com> has managed to reverse engineer the sensor part
|
||||
|
65
Documentation/hwmon/abituguru3
Normal file
65
Documentation/hwmon/abituguru3
Normal file
@ -0,0 +1,65 @@
|
||||
Kernel driver abituguru3
|
||||
========================
|
||||
|
||||
Supported chips:
|
||||
* Abit uGuru revision 3 (Hardware Monitor part, reading only)
|
||||
Prefix: 'abituguru3'
|
||||
Addresses scanned: ISA 0x0E0
|
||||
Datasheet: Not available, this driver is based on reverse engineering.
|
||||
Note:
|
||||
The uGuru is a microcontroller with onboard firmware which programs
|
||||
it to behave as a hwmon IC. There are many different revisions of the
|
||||
firmware and thus effectivly many different revisions of the uGuru.
|
||||
Below is an incomplete list with which revisions are used for which
|
||||
Motherboards:
|
||||
uGuru 1.00 ~ 1.24 (AI7, KV8-MAX3, AN7)
|
||||
uGuru 2.0.0.0 ~ 2.0.4.2 (KV8-PRO)
|
||||
uGuru 2.1.0.0 ~ 2.1.2.8 (AS8, AV8, AA8, AG8, AA8XE, AX8)
|
||||
uGuru 2.3.0.0 ~ 2.3.0.9 (AN8)
|
||||
uGuru 3.0.0.0 ~ 3.0.x.x (AW8, AL8, AT8, NI8 SLI, AT8 32X, AN8 32X,
|
||||
AW9D-MAX)
|
||||
The abituguru3 driver is only for revison 3.0.x.x motherboards,
|
||||
this driver will not work on older motherboards. For older
|
||||
motherboards use the abituguru (without the 3 !) driver.
|
||||
|
||||
Authors:
|
||||
Hans de Goede <j.w.r.degoede@hhs.nl>,
|
||||
(Initial reverse engineering done by Louis Kruger)
|
||||
|
||||
|
||||
Module Parameters
|
||||
-----------------
|
||||
|
||||
* force: bool Force detection. Note this parameter only causes the
|
||||
detection to be skipped, and thus the insmod to
|
||||
succeed. If the uGuru can't be read the actual hwmon
|
||||
driver will not load and thus no hwmon device will get
|
||||
registered.
|
||||
* verbose: bool Should the driver be verbose?
|
||||
0/off/false normal output
|
||||
1/on/true + verbose error reporting (default)
|
||||
Default: 1 (the driver is still in the testing phase)
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
This driver supports the hardware monitoring features of the third revision of
|
||||
the Abit uGuru chip, found on recent Abit uGuru featuring motherboards.
|
||||
|
||||
The 3rd revision of the uGuru chip in reality is a Winbond W83L951G.
|
||||
Unfortunatly this doesn't help since the W83L951G is a generic microcontroller
|
||||
with a custom Abit application running on it.
|
||||
|
||||
Despite Abit not releasing any information regarding the uGuru revision 3,
|
||||
Louis Kruger has managed to reverse engineer the sensor part of the uGuru.
|
||||
Without his work this driver would not have been possible.
|
||||
|
||||
Known Issues
|
||||
------------
|
||||
|
||||
The voltage and frequency control parts of the Abit uGuru are not supported,
|
||||
neither is writing any of the sensor settings and writing / reading the
|
||||
fanspeed control registers (FanEQ)
|
||||
|
||||
If you encounter any problems please mail me <j.w.r.degoede@hhs.nl> and
|
||||
include the output of: "dmesg | grep abituguru"
|
@ -6,13 +6,13 @@ Supported chips:
|
||||
Prefix: 'adm1030'
|
||||
Addresses scanned: I2C 0x2c to 0x2e
|
||||
Datasheet: Publicly available at the Analog Devices website
|
||||
http://products.analog.com/products/info.asp?product=ADM1030
|
||||
http://www.analog.com/en/prod/0%2C2877%2CADM1030%2C00.html
|
||||
|
||||
* Analog Devices ADM1031
|
||||
Prefix: 'adm1031'
|
||||
Addresses scanned: I2C 0x2c to 0x2e
|
||||
Datasheet: Publicly available at the Analog Devices website
|
||||
http://products.analog.com/products/info.asp?product=ADM1031
|
||||
http://www.analog.com/en/prod/0%2C2877%2CADM1031%2C00.html
|
||||
|
||||
Authors:
|
||||
Alexandre d'Alton <alex@alexdalton.org>
|
||||
|
257
Documentation/hwmon/dme1737
Normal file
257
Documentation/hwmon/dme1737
Normal file
@ -0,0 +1,257 @@
|
||||
Kernel driver dme1737
|
||||
=====================
|
||||
|
||||
Supported chips:
|
||||
* SMSC DME1737 and compatibles (like Asus A8000)
|
||||
Prefix: 'dme1737'
|
||||
Addresses scanned: I2C 0x2c, 0x2d, 0x2e
|
||||
Datasheet: Provided by SMSC upon request and under NDA
|
||||
|
||||
Authors:
|
||||
Juerg Haefliger <juergh@gmail.com>
|
||||
|
||||
|
||||
Module Parameters
|
||||
-----------------
|
||||
|
||||
* force_start: bool Enables the monitoring of voltage, fan and temp inputs
|
||||
and PWM output control functions. Using this parameter
|
||||
shouldn't be required since the BIOS usually takes care
|
||||
of this.
|
||||
|
||||
Note that there is no need to use this parameter if the driver loads without
|
||||
complaining. The driver will say so if it is necessary.
|
||||
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
This driver implements support for the hardware monitoring capabilities of the
|
||||
SMSC DME1737 and Asus A8000 (which are the same) Super-I/O chips. This chip
|
||||
features monitoring of 3 temp sensors temp[1-3] (2 remote diodes and 1
|
||||
internal), 7 voltages in[0-6] (6 external and 1 internal) and 6 fan speeds
|
||||
fan[1-6]. Additionally, the chip implements 5 PWM outputs pwm[1-3,5-6] for
|
||||
controlling fan speeds both manually and automatically.
|
||||
|
||||
Fan[3-6] and pwm[3,5-6] are optional features and their availability is
|
||||
dependent on the configuration of the chip. The driver will detect which
|
||||
features are present during initialization and create the sysfs attributes
|
||||
accordingly.
|
||||
|
||||
|
||||
Voltage Monitoring
|
||||
------------------
|
||||
|
||||
The voltage inputs are sampled with 12-bit resolution and have internal
|
||||
scaling resistors. The values returned by the driver therefore reflect true
|
||||
millivolts and don't need scaling. The voltage inputs are mapped as follows
|
||||
(the last column indicates the input ranges):
|
||||
|
||||
in0: +5VTR (+5V standby) 0V - 6.64V
|
||||
in1: Vccp (processor core) 0V - 3V
|
||||
in2: VCC (internal +3.3V) 0V - 4.38V
|
||||
in3: +5V 0V - 6.64V
|
||||
in4: +12V 0V - 16V
|
||||
in5: VTR (+3.3V standby) 0V - 4.38V
|
||||
in6: Vbat (+3.0V) 0V - 4.38V
|
||||
|
||||
Each voltage input has associated min and max limits which trigger an alarm
|
||||
when crossed.
|
||||
|
||||
|
||||
Temperature Monitoring
|
||||
----------------------
|
||||
|
||||
Temperatures are measured with 12-bit resolution and reported in millidegree
|
||||
Celsius. The chip also features offsets for all 3 temperature inputs which -
|
||||
when programmed - get added to the input readings. The chip does all the
|
||||
scaling by itself and the driver therefore reports true temperatures that don't
|
||||
need any user-space adjustments. The temperature inputs are mapped as follows
|
||||
(the last column indicates the input ranges):
|
||||
|
||||
temp1: Remote diode 1 (3904 type) temperature -127C - +127C
|
||||
temp2: DME1737 internal temperature -127C - +127C
|
||||
temp3: Remote diode 2 (3904 type) temperature -127C - +127C
|
||||
|
||||
Each temperature input has associated min and max limits which trigger an alarm
|
||||
when crossed. Additionally, each temperature input has a fault attribute that
|
||||
returns 1 when a faulty diode or an unconnected input is detected and 0
|
||||
otherwise.
|
||||
|
||||
|
||||
Fan Monitoring
|
||||
--------------
|
||||
|
||||
Fan RPMs are measured with 16-bit resolution. The chip provides inputs for 6
|
||||
fan tachometers. All 6 inputs have an associated min limit which triggers an
|
||||
alarm when crossed. Fan inputs 1-4 provide type attributes that need to be set
|
||||
to the number of pulses per fan revolution that the connected tachometer
|
||||
generates. Supported values are 1, 2, and 4. Fan inputs 5-6 only support fans
|
||||
that generate 2 pulses per revolution. Fan inputs 5-6 also provide a max
|
||||
attribute that needs to be set to the maximum attainable RPM (fan at 100% duty-
|
||||
cycle) of the input. The chip adjusts the sampling rate based on this value.
|
||||
|
||||
|
||||
PWM Output Control
|
||||
------------------
|
||||
|
||||
This chip features 5 PWM outputs. PWM outputs 1-3 are associated with fan
|
||||
inputs 1-3 and PWM outputs 5-6 are associated with fan inputs 5-6. PWM outputs
|
||||
1-3 can be configured to operate either in manual or automatic mode by setting
|
||||
the appropriate enable attribute accordingly. PWM outputs 5-6 can only operate
|
||||
in manual mode, their enable attributes are therefore read-only. When set to
|
||||
manual mode, the fan speed is set by writing the duty-cycle value to the
|
||||
appropriate PWM attribute. In automatic mode, the PWM attribute returns the
|
||||
current duty-cycle as set by the fan controller in the chip. All PWM outputs
|
||||
support the setting of the output frequency via the freq attribute.
|
||||
|
||||
In automatic mode, the chip supports the setting of the PWM ramp rate which
|
||||
defines how fast the PWM output is adjusting to changes of the associated
|
||||
temperature input. Associating PWM outputs to temperature inputs is done via
|
||||
temperature zones. The chip features 3 zones whose assignments to temperature
|
||||
inputs is static and determined during initialization. These assignments can
|
||||
be retrieved via the zone[1-3]_auto_channels_temp attributes. Each PWM output
|
||||
is assigned to one (or hottest of multiple) temperature zone(s) through the
|
||||
pwm[1-3]_auto_channels_zone attributes. Each PWM output has 3 distinct output
|
||||
duty-cycles: full, low, and min. Full is internally hard-wired to 255 (100%)
|
||||
and low and min can be programmed via pwm[1-3]_auto_point1_pwm and
|
||||
pwm[1-3]_auto_pwm_min, respectively. The thermal thresholds of the zones are
|
||||
programmed via zone[1-3]_auto_point[1-3]_temp and
|
||||
zone[1-3]_auto_point1_temp_hyst:
|
||||
|
||||
pwm[1-3]_auto_point2_pwm full-speed duty-cycle (255, i.e., 100%)
|
||||
pwm[1-3]_auto_point1_pwm low-speed duty-cycle
|
||||
pwm[1-3]_auto_pwm_min min-speed duty-cycle
|
||||
|
||||
zone[1-3]_auto_point3_temp full-speed temp (all outputs)
|
||||
zone[1-3]_auto_point2_temp full-speed temp
|
||||
zone[1-3]_auto_point1_temp low-speed temp
|
||||
zone[1-3]_auto_point1_temp_hyst min-speed temp
|
||||
|
||||
The chip adjusts the output duty-cycle linearly in the range of auto_point1_pwm
|
||||
to auto_point2_pwm if the temperature of the associated zone is between
|
||||
auto_point1_temp and auto_point2_temp. If the temperature drops below the
|
||||
auto_point1_temp_hyst value, the output duty-cycle is set to the auto_pwm_min
|
||||
value which only supports two values: 0 or auto_point1_pwm. That means that the
|
||||
fan either turns completely off or keeps spinning with the low-speed
|
||||
duty-cycle. If any of the temperatures rise above the auto_point3_temp value,
|
||||
all PWM outputs are set to 100% duty-cycle.
|
||||
|
||||
Following is another representation of how the chip sets the output duty-cycle
|
||||
based on the temperature of the associated thermal zone:
|
||||
|
||||
Duty-Cycle Duty-Cycle
|
||||
Temperature Rising Temp Falling Temp
|
||||
----------- ----------- ------------
|
||||
full-speed full-speed full-speed
|
||||
|
||||
< linearly adjusted duty-cycle >
|
||||
|
||||
low-speed low-speed low-speed
|
||||
min-speed low-speed
|
||||
min-speed min-speed min-speed
|
||||
min-speed min-speed
|
||||
|
||||
|
||||
Sysfs Attributes
|
||||
----------------
|
||||
|
||||
Following is a list of all sysfs attributes that the driver provides, their
|
||||
permissions and a short description:
|
||||
|
||||
Name Perm Description
|
||||
---- ---- -----------
|
||||
cpu0_vid RO CPU core reference voltage in
|
||||
millivolts.
|
||||
vrm RW Voltage regulator module version
|
||||
number.
|
||||
|
||||
in[0-6]_input RO Measured voltage in millivolts.
|
||||
in[0-6]_min RW Low limit for voltage input.
|
||||
in[0-6]_max RW High limit for voltage input.
|
||||
in[0-6]_alarm RO Voltage input alarm. Returns 1 if
|
||||
voltage input is or went outside the
|
||||
associated min-max range, 0 otherwise.
|
||||
|
||||
temp[1-3]_input RO Measured temperature in millidegree
|
||||
Celsius.
|
||||
temp[1-3]_min RW Low limit for temp input.
|
||||
temp[1-3]_max RW High limit for temp input.
|
||||
temp[1-3]_offset RW Offset for temp input. This value will
|
||||
be added by the chip to the measured
|
||||
temperature.
|
||||
temp[1-3]_alarm RO Alarm for temp input. Returns 1 if temp
|
||||
input is or went outside the associated
|
||||
min-max range, 0 otherwise.
|
||||
temp[1-3]_fault RO Temp input fault. Returns 1 if the chip
|
||||
detects a faulty thermal diode or an
|
||||
unconnected temp input, 0 otherwise.
|
||||
|
||||
zone[1-3]_auto_channels_temp RO Temperature zone to temperature input
|
||||
mapping. This attribute is a bitfield
|
||||
and supports the following values:
|
||||
1: temp1
|
||||
2: temp2
|
||||
4: temp3
|
||||
zone[1-3]_auto_point1_temp_hyst RW Auto PWM temp point1 hysteresis. The
|
||||
output of the corresponding PWM is set
|
||||
to the pwm_auto_min value if the temp
|
||||
falls below the auto_point1_temp_hyst
|
||||
value.
|
||||
zone[1-3]_auto_point[1-3]_temp RW Auto PWM temp points. Auto_point1 is
|
||||
the low-speed temp, auto_point2 is the
|
||||
full-speed temp, and auto_point3 is the
|
||||
temp at which all PWM outputs are set
|
||||
to full-speed (100% duty-cycle).
|
||||
|
||||
fan[1-6]_input RO Measured fan speed in RPM.
|
||||
fan[1-6]_min RW Low limit for fan input.
|
||||
fan[1-6]_alarm RO Alarm for fan input. Returns 1 if fan
|
||||
input is or went below the associated
|
||||
min value, 0 otherwise.
|
||||
fan[1-4]_type RW Type of attached fan. Expressed in
|
||||
number of pulses per revolution that
|
||||
the fan generates. Supported values are
|
||||
1, 2, and 4.
|
||||
fan[5-6]_max RW Max attainable RPM at 100% duty-cycle.
|
||||
Required for chip to adjust the
|
||||
sampling rate accordingly.
|
||||
|
||||
pmw[1-3,5-6] RO/RW Duty-cycle of PWM output. Supported
|
||||
values are 0-255 (0%-100%). Only
|
||||
writeable if the associated PWM is in
|
||||
manual mode.
|
||||
pwm[1-3]_enable RW Enable of PWM outputs 1-3. Supported
|
||||
values are:
|
||||
0: turned off (output @ 100%)
|
||||
1: manual mode
|
||||
2: automatic mode
|
||||
pwm[5-6]_enable RO Enable of PWM outputs 5-6. Always
|
||||
returns 1 since these 2 outputs are
|
||||
hard-wired to manual mode.
|
||||
pmw[1-3,5-6]_freq RW Frequency of PWM output. Supported
|
||||
values are in the range 11Hz-30000Hz
|
||||
(default is 25000Hz).
|
||||
pmw[1-3]_ramp_rate RW Ramp rate of PWM output. Determines how
|
||||
fast the PWM duty-cycle will change
|
||||
when the PWM is in automatic mode.
|
||||
Expressed in ms per PWM step. Supported
|
||||
values are in the range 0ms-206ms
|
||||
(default is 0, which means the duty-
|
||||
cycle changes instantly).
|
||||
pwm[1-3]_auto_channels_zone RW PWM output to temperature zone mapping.
|
||||
This attribute is a bitfield and
|
||||
supports the following values:
|
||||
1: zone1
|
||||
2: zone2
|
||||
4: zone3
|
||||
6: highest of zone[2-3]
|
||||
7: highest of zone[1-3]
|
||||
pwm[1-3]_auto_pwm_min RW Auto PWM min pwm. Minimum PWM duty-
|
||||
cycle. Supported values are 0 or
|
||||
auto_point1_pwm.
|
||||
pwm[1-3]_auto_point1_pwm RW Auto PWM pwm point. Auto_point1 is the
|
||||
low-speed duty-cycle.
|
||||
pwm[1-3]_auto_point2_pwm RO Auto PWM pwm point. Auto_point2 is the
|
||||
full-speed duty-cycle which is hard-
|
||||
wired to 255 (100% duty-cycle).
|
@ -5,11 +5,11 @@ Supported chips:
|
||||
* Fintek F71805F/FG
|
||||
Prefix: 'f71805f'
|
||||
Addresses scanned: none, address read from Super I/O config space
|
||||
Datasheet: Provided by Fintek on request
|
||||
Datasheet: Available from the Fintek website
|
||||
* Fintek F71872F/FG
|
||||
Prefix: 'f71872f'
|
||||
Addresses scanned: none, address read from Super I/O config space
|
||||
Datasheet: Provided by Fintek on request
|
||||
Datasheet: Available from the Fintek website
|
||||
|
||||
Author: Jean Delvare <khali@linux-fr.org>
|
||||
|
||||
@ -128,7 +128,9 @@ it.
|
||||
When the PWM method is used, you can select the operating frequency,
|
||||
from 187.5 kHz (default) to 31 Hz. The best frequency depends on the
|
||||
fan model. As a rule of thumb, lower frequencies seem to give better
|
||||
control, but may generate annoying high-pitch noise. Fintek recommends
|
||||
control, but may generate annoying high-pitch noise. So a frequency just
|
||||
above the audible range, such as 25 kHz, may be a good choice; if this
|
||||
doesn't give you good linear control, try reducing it. Fintek recommends
|
||||
not going below 1 kHz, as the fan tachometers get confused by lower
|
||||
frequencies as well.
|
||||
|
||||
@ -136,16 +138,23 @@ When the DC method is used, Fintek recommends not going below 5 V, which
|
||||
corresponds to a pwm value of 106 for the driver. The driver doesn't
|
||||
enforce this limit though.
|
||||
|
||||
Three different fan control modes are supported:
|
||||
Three different fan control modes are supported; the mode number is written
|
||||
to the pwm<n>_enable file.
|
||||
|
||||
* Manual mode
|
||||
You ask for a specific PWM duty cycle or DC voltage.
|
||||
* 1: Manual mode
|
||||
You ask for a specific PWM duty cycle or DC voltage by writing to the
|
||||
pwm<n> file.
|
||||
|
||||
* Fan speed mode
|
||||
You ask for a specific fan speed. This mode assumes that pwm1
|
||||
corresponds to fan1, pwm2 to fan2 and pwm3 to fan3.
|
||||
* 2: Temperature mode
|
||||
You define 3 temperature/fan speed trip points using the
|
||||
pwm<n>_auto_point<m>_temp and _fan files. These define a staircase
|
||||
relationship between temperature and fan speed with two additional points
|
||||
interpolated between the values that you define. When the temperature
|
||||
is below auto_point1_temp the fan is switched off.
|
||||
|
||||
* Temperature mode
|
||||
You define 3 temperature/fan speed trip points, and the fan speed is
|
||||
adjusted depending on the measured temperature, using interpolation.
|
||||
This mode is not yet supported by the driver.
|
||||
* 3: Fan speed mode
|
||||
You ask for a specific fan speed by writing to the fan<n>_target file.
|
||||
|
||||
Both of the automatic modes require that pwm1 corresponds to fan1, pwm2 to
|
||||
fan2 and pwm3 to fan3. Temperature mode also requires that temp1 corresponds
|
||||
to pwm1 and fan1, etc.
|
||||
|
@ -12,11 +12,12 @@ Supported chips:
|
||||
Addresses scanned: from Super I/O config space (8 I/O ports)
|
||||
Datasheet: Publicly available at the ITE website
|
||||
http://www.ite.com.tw/
|
||||
* IT8716F
|
||||
* IT8716F/IT8726F
|
||||
Prefix: 'it8716'
|
||||
Addresses scanned: from Super I/O config space (8 I/O ports)
|
||||
Datasheet: Publicly available at the ITE website
|
||||
http://www.ite.com.tw/product_info/file/pc/IT8716F_V0.3.ZIP
|
||||
http://www.ite.com.tw/product_info/file/pc/IT8726F_V0.3.pdf
|
||||
* IT8718F
|
||||
Prefix: 'it8718'
|
||||
Addresses scanned: from Super I/O config space (8 I/O ports)
|
||||
@ -68,7 +69,7 @@ Description
|
||||
-----------
|
||||
|
||||
This driver implements support for the IT8705F, IT8712F, IT8716F,
|
||||
IT8718F and SiS950 chips.
|
||||
IT8718F, IT8726F and SiS950 chips.
|
||||
|
||||
These chips are 'Super I/O chips', supporting floppy disks, infrared ports,
|
||||
joysticks and other miscellaneous stuff. For hardware monitoring, they
|
||||
@ -97,6 +98,10 @@ clock divider mess) but not compatible with the older chips and
|
||||
revisions. For now, the driver only uses the 16-bit mode on the
|
||||
IT8716F and IT8718F.
|
||||
|
||||
The IT8726F is just bit enhanced IT8716F with additional hardware
|
||||
for AMD power sequencing. Therefore the chip will appear as IT8716F
|
||||
to userspace applications.
|
||||
|
||||
Temperatures are measured in degrees Celsius. An alarm is triggered once
|
||||
when the Overtemperature Shutdown limit is crossed.
|
||||
|
||||
|
@ -48,6 +48,18 @@ Supported chips:
|
||||
Addresses scanned: I2C 0x4c, 0x4d (unsupported 0x4e)
|
||||
Datasheet: Publicly available at the Maxim website
|
||||
http://www.maxim-ic.com/quick_view2.cfm/qv_pk/2578
|
||||
* Maxim MAX6680
|
||||
Prefix: 'max6680'
|
||||
Addresses scanned: I2C 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b,
|
||||
0x4c, 0x4d and 0x4e
|
||||
Datasheet: Publicly available at the Maxim website
|
||||
http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3370
|
||||
* Maxim MAX6681
|
||||
Prefix: 'max6680'
|
||||
Addresses scanned: I2C 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b,
|
||||
0x4c, 0x4d and 0x4e
|
||||
Datasheet: Publicly available at the Maxim website
|
||||
http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3370
|
||||
|
||||
|
||||
Author: Jean Delvare <khali@linux-fr.org>
|
||||
@ -59,11 +71,15 @@ Description
|
||||
The LM90 is a digital temperature sensor. It senses its own temperature as
|
||||
well as the temperature of up to one external diode. It is compatible
|
||||
with many other devices such as the LM86, the LM89, the LM99, the ADM1032,
|
||||
the MAX6657, MAX6658 and the MAX6659 all of which are supported by this driver.
|
||||
Note that there is no easy way to differentiate between the last three
|
||||
variants. The extra address and features of the MAX6659 are not supported by
|
||||
this driver. Additionally, the ADT7461 is supported if found in ADM1032
|
||||
compatibility mode.
|
||||
the MAX6657, MAX6658, MAX6659, MAX6680 and the MAX6681 all of which are
|
||||
supported by this driver.
|
||||
|
||||
Note that there is no easy way to differentiate between the MAX6657,
|
||||
MAX6658 and MAX6659 variants. The extra address and features of the
|
||||
MAX6659 are not supported by this driver. The MAX6680 and MAX6681 only
|
||||
differ in their pinout, therefore they obviously can't (and don't need to)
|
||||
be distinguished. Additionally, the ADT7461 is supported if found in
|
||||
ADM1032 compatibility mode.
|
||||
|
||||
The specificity of this family of chipsets over the ADM1021/LM84
|
||||
family is that it features critical limits with hysteresis, and an
|
||||
@ -93,18 +109,22 @@ ADM1032:
|
||||
* ALERT is triggered by open remote sensor.
|
||||
* SMBus PEC support for Write Byte and Receive Byte transactions.
|
||||
|
||||
ADT7461
|
||||
ADT7461:
|
||||
* Extended temperature range (breaks compatibility)
|
||||
* Lower resolution for remote temperature
|
||||
|
||||
MAX6657 and MAX6658:
|
||||
* Remote sensor type selection
|
||||
|
||||
MAX6659
|
||||
MAX6659:
|
||||
* Selectable address
|
||||
* Second critical temperature limit
|
||||
* Remote sensor type selection
|
||||
|
||||
MAX6680 and MAX6681:
|
||||
* Selectable address
|
||||
* Remote sensor type selection
|
||||
|
||||
All temperature values are given in degrees Celsius. Resolution
|
||||
is 1.0 degree for the local temperature, 0.125 degree for the remote
|
||||
temperature.
|
||||
@ -141,7 +161,7 @@ SMBus Read Byte, and PEC will work properly.
|
||||
Additionally, the ADM1032 doesn't support SMBus Send Byte with PEC.
|
||||
Instead, it will try to write the PEC value to the register (because the
|
||||
SMBus Send Byte transaction with PEC is similar to a Write Byte transaction
|
||||
without PEC), which is not what we want. Thus, PEC is explicitely disabled
|
||||
without PEC), which is not what we want. Thus, PEC is explicitly disabled
|
||||
on SMBus Send Byte transactions in the lm90 driver.
|
||||
|
||||
PEC on byte data transactions represents a significant increase in bandwidth
|
||||
|
412
Documentation/hwmon/lm93
Normal file
412
Documentation/hwmon/lm93
Normal file
@ -0,0 +1,412 @@
|
||||
Kernel driver lm93
|
||||
==================
|
||||
|
||||
Supported chips:
|
||||
* National Semiconductor LM93
|
||||
Prefix 'lm93'
|
||||
Addresses scanned: I2C 0x2c-0x2e
|
||||
Datasheet: http://www.national.com/ds.cgi/LM/LM93.pdf
|
||||
|
||||
Author:
|
||||
Mark M. Hoffman <mhoffman@lightlink.com>
|
||||
Ported to 2.6 by Eric J. Bowersox <ericb@aspsys.com>
|
||||
Adapted to 2.6.20 by Carsten Emde <ce@osadl.org>
|
||||
Modified for mainline integration by Hans J. Koch <hjk@linutronix.de>
|
||||
|
||||
Module Parameters
|
||||
-----------------
|
||||
|
||||
(specific to LM93)
|
||||
* init: integer
|
||||
Set to non-zero to force some initializations (default is 0).
|
||||
* disable_block: integer
|
||||
A "0" allows SMBus block data transactions if the host supports them. A "1"
|
||||
disables SMBus block data transactions. The default is 0.
|
||||
* vccp_limit_type: integer array (2)
|
||||
Configures in7 and in8 limit type, where 0 means absolute and non-zero
|
||||
means relative. "Relative" here refers to "Dynamic Vccp Monitoring using
|
||||
VID" from the datasheet. It greatly simplifies the interface to allow
|
||||
only one set of limits (absolute or relative) to be in operation at a
|
||||
time (even though the hardware is capable of enabling both). There's
|
||||
not a compelling use case for enabling both at once, anyway. The default
|
||||
is "0,0".
|
||||
* vid_agtl: integer
|
||||
A "0" configures the VID pins for V(ih) = 2.1V min, V(il) = 0.8V max.
|
||||
A "1" configures the VID pins for V(ih) = 0.8V min, V(il) = 0.4V max.
|
||||
(The latter setting is referred to as AGTL+ Compatible in the datasheet.)
|
||||
I.e. this parameter controls the VID pin input thresholds; if your VID
|
||||
inputs are not working, try changing this. The default value is "0".
|
||||
|
||||
(common among sensor drivers)
|
||||
* force: short array (min = 1, max = 48)
|
||||
List of adapter,address pairs to assume to be present. Autodetection
|
||||
of the target device will still be attempted. Use one of the more
|
||||
specific force directives below if this doesn't detect the device.
|
||||
* force_lm93: short array (min = 1, max = 48)
|
||||
List of adapter,address pairs which are unquestionably assumed to contain
|
||||
a 'lm93' chip
|
||||
* ignore: short array (min = 1, max = 48)
|
||||
List of adapter,address pairs not to scan
|
||||
* ignore_range: short array (min = 1, max = 48)
|
||||
List of adapter,start-addr,end-addr triples not to scan
|
||||
* probe: short array (min = 1, max = 48)
|
||||
List of adapter,address pairs to scan additionally
|
||||
* probe_range: short array (min = 1, max = 48)
|
||||
List of adapter,start-addr,end-addr triples to scan additionally
|
||||
|
||||
|
||||
Hardware Description
|
||||
--------------------
|
||||
|
||||
(from the datasheet)
|
||||
|
||||
The LM93, hardware monitor, has a two wire digital interface compatible with
|
||||
SMBus 2.0. Using an 8-bit ADC, the LM93 measures the temperature of two remote
|
||||
diode connected transistors as well as its own die and 16 power supply
|
||||
voltages. To set fan speed, the LM93 has two PWM outputs that are each
|
||||
controlled by up to four temperature zones. The fancontrol algorithm is lookup
|
||||
table based. The LM93 includes a digital filter that can be invoked to smooth
|
||||
temperature readings for better control of fan speed. The LM93 has four
|
||||
tachometer inputs to measure fan speed. Limit and status registers for all
|
||||
measured values are included. The LM93 builds upon the functionality of
|
||||
previous motherboard management ASICs and uses some of the LM85 s features
|
||||
(i.e. smart tachometer mode). It also adds measurement and control support
|
||||
for dynamic Vccp monitoring and PROCHOT. It is designed to monitor a dual
|
||||
processor Xeon class motherboard with a minimum of external components.
|
||||
|
||||
|
||||
Driver Description
|
||||
------------------
|
||||
|
||||
This driver implements support for the National Semiconductor LM93.
|
||||
|
||||
|
||||
User Interface
|
||||
--------------
|
||||
|
||||
#PROCHOT:
|
||||
|
||||
The LM93 can monitor two #PROCHOT signals. The results are found in the
|
||||
sysfs files prochot1, prochot2, prochot1_avg, prochot2_avg, prochot1_max,
|
||||
and prochot2_max. prochot1_max and prochot2_max contain the user limits
|
||||
for #PROCHOT1 and #PROCHOT2, respectively. prochot1 and prochot2 contain
|
||||
the current readings for the most recent complete time interval. The
|
||||
value of prochot1_avg and prochot2_avg is something like a 2 period
|
||||
exponential moving average (but not quite - check the datasheet). Note
|
||||
that this third value is calculated by the chip itself. All values range
|
||||
from 0-255 where 0 indicates no throttling, and 255 indicates > 99.6%.
|
||||
|
||||
The monitoring intervals for the two #PROCHOT signals is also configurable.
|
||||
These intervals can be found in the sysfs files prochot1_interval and
|
||||
prochot2_interval. The values in these files specify the intervals for
|
||||
#P1_PROCHOT and #P2_PROCHOT, respectively. Selecting a value not in this
|
||||
list will cause the driver to use the next largest interval. The available
|
||||
intervals are:
|
||||
|
||||
#PROCHOT intervals: 0.73, 1.46, 2.9, 5.8, 11.7, 23.3, 46.6, 93.2, 186, 372
|
||||
|
||||
It is possible to configure the LM93 to logically short the two #PROCHOT
|
||||
signals. I.e. when #P1_PROCHOT is asserted, the LM93 will automatically
|
||||
assert #P2_PROCHOT, and vice-versa. This mode is enabled by writing a
|
||||
non-zero integer to the sysfs file prochot_short.
|
||||
|
||||
The LM93 can also override the #PROCHOT pins by driving a PWM signal onto
|
||||
one or both of them. When overridden, the signal has a period of 3.56 mS,
|
||||
a minimum pulse width of 5 clocks (at 22.5kHz => 6.25% duty cycle), and
|
||||
a maximum pulse width of 80 clocks (at 22.5kHz => 99.88% duty cycle).
|
||||
|
||||
The sysfs files prochot1_override and prochot2_override contain boolean
|
||||
intgers which enable or disable the override function for #P1_PROCHOT and
|
||||
#P2_PROCHOT, respectively. The sysfs file prochot_override_duty_cycle
|
||||
contains a value controlling the duty cycle for the PWM signal used when
|
||||
the override function is enabled. This value ranges from 0 to 15, with 0
|
||||
indicating minimum duty cycle and 15 indicating maximum.
|
||||
|
||||
#VRD_HOT:
|
||||
|
||||
The LM93 can monitor two #VRD_HOT signals. The results are found in the
|
||||
sysfs files vrdhot1 and vrdhot2. There is one value per file: a boolean for
|
||||
which 1 indicates #VRD_HOT is asserted and 0 indicates it is negated. These
|
||||
files are read-only.
|
||||
|
||||
Smart Tach Mode:
|
||||
|
||||
(from the datasheet)
|
||||
|
||||
If a fan is driven using a low-side drive PWM, the tachometer
|
||||
output of the fan is corrupted. The LM93 includes smart tachometer
|
||||
circuitry that allows an accurate tachometer reading to be
|
||||
achieved despite the signal corruption. In smart tach mode all
|
||||
four signals are measured within 4 seconds.
|
||||
|
||||
Smart tach mode is enabled by the driver by writing 1 or 2 (associating the
|
||||
the fan tachometer with a pwm) to the sysfs file fan<n>_smart_tach. A zero
|
||||
will disable the function for that fan. Note that Smart tach mode cannot be
|
||||
enabled if the PWM output frequency is 22500 Hz (see below).
|
||||
|
||||
Manual PWM:
|
||||
|
||||
The LM93 has a fixed or override mode for the two PWM outputs (although, there
|
||||
are still some conditions that will override even this mode - see section
|
||||
15.10.6 of the datasheet for details.) The sysfs files pwm1_override
|
||||
and pwm2_override are used to enable this mode; each is a boolean integer
|
||||
where 0 disables and 1 enables the manual control mode. The sysfs files pwm1
|
||||
and pwm2 are used to set the manual duty cycle; each is an integer (0-255)
|
||||
where 0 is 0% duty cycle, and 255 is 100%. Note that the duty cycle values
|
||||
are constrained by the hardware. Selecting a value which is not available
|
||||
will cause the driver to use the next largest value. Also note: when manual
|
||||
PWM mode is disabled, the value of pwm1 and pwm2 indicates the current duty
|
||||
cycle chosen by the h/w.
|
||||
|
||||
PWM Output Frequency:
|
||||
|
||||
The LM93 supports several different frequencies for the PWM output channels.
|
||||
The sysfs files pwm1_freq and pwm2_freq are used to select the frequency. The
|
||||
frequency values are constrained by the hardware. Selecting a value which is
|
||||
not available will cause the driver to use the next largest value. Also note
|
||||
that this parameter has implications for the Smart Tach Mode (see above).
|
||||
|
||||
PWM Output Frequencies: 12, 36, 48, 60, 72, 84, 96, 22500 (h/w default)
|
||||
|
||||
Automatic PWM:
|
||||
|
||||
The LM93 is capable of complex automatic fan control, with many different
|
||||
points of configuration. To start, each PWM output can be bound to any
|
||||
combination of eight control sources. The final PWM is the largest of all
|
||||
individual control sources to which the PWM output is bound.
|
||||
|
||||
The eight control sources are: temp1-temp4 (aka "zones" in the datasheet),
|
||||
#PROCHOT 1 & 2, and #VRDHOT 1 & 2. The bindings are expressed as a bitmask
|
||||
in the sysfs files pwm<n>_auto_channels, where a "1" enables the binding, and
|
||||
a "0" disables it. The h/w default is 0x0f (all temperatures bound).
|
||||
|
||||
0x01 - Temp 1
|
||||
0x02 - Temp 2
|
||||
0x04 - Temp 3
|
||||
0x08 - Temp 4
|
||||
0x10 - #PROCHOT 1
|
||||
0x20 - #PROCHOT 2
|
||||
0x40 - #VRDHOT 1
|
||||
0x80 - #VRDHOT 2
|
||||
|
||||
The function y = f(x) takes a source temperature x to a PWM output y. This
|
||||
function of the LM93 is derived from a base temperature and a table of 12
|
||||
temperature offsets. The base temperature is expressed in degrees C in the
|
||||
sysfs files temp<n>_auto_base. The offsets are expressed in cumulative
|
||||
degrees C, with the value of offset <i> for temperature value <n> being
|
||||
contained in the file temp<n>_auto_offset<i>. E.g. if the base temperature
|
||||
is 40C:
|
||||
|
||||
offset # temp<n>_auto_offset<i> range pwm
|
||||
1 0 - 25.00%
|
||||
2 0 - 28.57%
|
||||
3 1 40C - 41C 32.14%
|
||||
4 1 41C - 42C 35.71%
|
||||
5 2 42C - 44C 39.29%
|
||||
6 2 44C - 46C 42.86%
|
||||
7 2 48C - 50C 46.43%
|
||||
8 2 50C - 52C 50.00%
|
||||
9 2 52C - 54C 53.57%
|
||||
10 2 54C - 56C 57.14%
|
||||
11 2 56C - 58C 71.43%
|
||||
12 2 58C - 60C 85.71%
|
||||
> 60C 100.00%
|
||||
|
||||
Valid offsets are in the range 0C <= x <= 7.5C in 0.5C increments.
|
||||
|
||||
There is an independent base temperature for each temperature channel. Note,
|
||||
however, there are only two tables of offsets: one each for temp[12] and
|
||||
temp[34]. Therefore, any change to e.g. temp1_auto_offset<i> will also
|
||||
affect temp2_auto_offset<i>.
|
||||
|
||||
The LM93 can also apply hysteresis to the offset table, to prevent unwanted
|
||||
oscillation between two steps in the offsets table. These values are found in
|
||||
the sysfs files temp<n>_auto_offset_hyst. The value in this file has the
|
||||
same representation as in temp<n>_auto_offset<i>.
|
||||
|
||||
If a temperature reading falls below the base value for that channel, the LM93
|
||||
will use the minimum PWM value. These values are found in the sysfs files
|
||||
temp<n>_auto_pwm_min. Note, there are only two minimums: one each for temp[12]
|
||||
and temp[34]. Therefore, any change to e.g. temp1_auto_pwm_min will also
|
||||
affect temp2_auto_pwm_min.
|
||||
|
||||
PWM Spin-Up Cycle:
|
||||
|
||||
A spin-up cycle occurs when a PWM output is commanded from 0% duty cycle to
|
||||
some value > 0%. The LM93 supports a minimum duty cycle during spin-up. These
|
||||
values are found in the sysfs files pwm<n>_auto_spinup_min. The value in this
|
||||
file has the same representation as other PWM duty cycle values. The
|
||||
duration of the spin-up cycle is also configurable. These values are found in
|
||||
the sysfs files pwm<n>_auto_spinup_time. The value in this file is
|
||||
the spin-up time in seconds. The available spin-up times are constrained by
|
||||
the hardware. Selecting a value which is not available will cause the driver
|
||||
to use the next largest value.
|
||||
|
||||
Spin-up Durations: 0 (disabled, h/w default), 0.1, 0.25, 0.4, 0.7, 1.0,
|
||||
2.0, 4.0
|
||||
|
||||
#PROCHOT and #VRDHOT PWM Ramping:
|
||||
|
||||
If the #PROCHOT or #VRDHOT signals are asserted while bound to a PWM output
|
||||
channel, the LM93 will ramp the PWM output up to 100% duty cycle in discrete
|
||||
steps. The duration of each step is configurable. There are two files, with
|
||||
one value each in seconds: pwm_auto_prochot_ramp and pwm_auto_vrdhot_ramp.
|
||||
The available ramp times are constrained by the hardware. Selecting a value
|
||||
which is not available will cause the driver to use the next largest value.
|
||||
|
||||
Ramp Times: 0 (disabled, h/w default) to 0.75 in 0.05 second intervals
|
||||
|
||||
Fan Boost:
|
||||
|
||||
For each temperature channel, there is a boost temperature: if the channel
|
||||
exceeds this limit, the LM93 will immediately drive both PWM outputs to 100%.
|
||||
This limit is expressed in degrees C in the sysfs files temp<n>_auto_boost.
|
||||
There is also a hysteresis temperature for this function: after the boost
|
||||
limit is reached, the temperature channel must drop below this value before
|
||||
the boost function is disabled. This temperature is also expressed in degrees
|
||||
C in the sysfs files temp<n>_auto_boost_hyst.
|
||||
|
||||
GPIO Pins:
|
||||
|
||||
The LM93 can monitor the logic level of four dedicated GPIO pins as well as the
|
||||
four tach input pins. GPIO0-GPIO3 correspond to (fan) tach 1-4, respectively.
|
||||
All eight GPIOs are read by reading the bitmask in the sysfs file gpio. The
|
||||
LSB is GPIO0, and the MSB is GPIO7.
|
||||
|
||||
|
||||
LM93 Unique sysfs Files
|
||||
-----------------------
|
||||
|
||||
file description
|
||||
-------------------------------------------------------------
|
||||
|
||||
prochot<n> current #PROCHOT %
|
||||
|
||||
prochot<n>_avg moving average #PROCHOT %
|
||||
|
||||
prochot<n>_max limit #PROCHOT %
|
||||
|
||||
prochot_short enable or disable logical #PROCHOT pin short
|
||||
|
||||
prochot<n>_override force #PROCHOT assertion as PWM
|
||||
|
||||
prochot_override_duty_cycle
|
||||
duty cycle for the PWM signal used when
|
||||
#PROCHOT is overridden
|
||||
|
||||
prochot<n>_interval #PROCHOT PWM sampling interval
|
||||
|
||||
vrdhot<n> 0 means negated, 1 means asserted
|
||||
|
||||
fan<n>_smart_tach enable or disable smart tach mode
|
||||
|
||||
pwm<n>_auto_channels select control sources for PWM outputs
|
||||
|
||||
pwm<n>_auto_spinup_min minimum duty cycle during spin-up
|
||||
|
||||
pwm<n>_auto_spinup_time duration of spin-up
|
||||
|
||||
pwm_auto_prochot_ramp ramp time per step when #PROCHOT asserted
|
||||
|
||||
pwm_auto_vrdhot_ramp ramp time per step when #VRDHOT asserted
|
||||
|
||||
temp<n>_auto_base temperature channel base
|
||||
|
||||
temp<n>_auto_offset[1-12]
|
||||
temperature channel offsets
|
||||
|
||||
temp<n>_auto_offset_hyst
|
||||
temperature channel offset hysteresis
|
||||
|
||||
temp<n>_auto_boost temperature channel boost (PWMs to 100%) limit
|
||||
|
||||
temp<n>_auto_boost_hyst temperature channel boost hysteresis
|
||||
|
||||
gpio input state of 8 GPIO pins; read-only
|
||||
|
||||
|
||||
Sample Configuration File
|
||||
-------------------------
|
||||
|
||||
Here is a sample LM93 chip config for sensors.conf:
|
||||
|
||||
---------- cut here ----------
|
||||
chip "lm93-*"
|
||||
|
||||
# VOLTAGE INPUTS
|
||||
|
||||
# labels and scaling based on datasheet recommendations
|
||||
label in1 "+12V1"
|
||||
compute in1 @ * 12.945, @ / 12.945
|
||||
set in1_min 12 * 0.90
|
||||
set in1_max 12 * 1.10
|
||||
|
||||
label in2 "+12V2"
|
||||
compute in2 @ * 12.945, @ / 12.945
|
||||
set in2_min 12 * 0.90
|
||||
set in2_max 12 * 1.10
|
||||
|
||||
label in3 "+12V3"
|
||||
compute in3 @ * 12.945, @ / 12.945
|
||||
set in3_min 12 * 0.90
|
||||
set in3_max 12 * 1.10
|
||||
|
||||
label in4 "FSB_Vtt"
|
||||
|
||||
label in5 "3GIO"
|
||||
|
||||
label in6 "ICH_Core"
|
||||
|
||||
label in7 "Vccp1"
|
||||
|
||||
label in8 "Vccp2"
|
||||
|
||||
label in9 "+3.3V"
|
||||
set in9_min 3.3 * 0.90
|
||||
set in9_max 3.3 * 1.10
|
||||
|
||||
label in10 "+5V"
|
||||
set in10_min 5.0 * 0.90
|
||||
set in10_max 5.0 * 1.10
|
||||
|
||||
label in11 "SCSI_Core"
|
||||
|
||||
label in12 "Mem_Core"
|
||||
|
||||
label in13 "Mem_Vtt"
|
||||
|
||||
label in14 "Gbit_Core"
|
||||
|
||||
# Assuming R1/R2 = 4.1143, and 3.3V reference
|
||||
# -12V = (4.1143 + 1) * (@ - 3.3) + 3.3
|
||||
label in15 "-12V"
|
||||
compute in15 @ * 5.1143 - 13.57719, (@ + 13.57719) / 5.1143
|
||||
set in15_min -12 * 0.90
|
||||
set in15_max -12 * 1.10
|
||||
|
||||
label in16 "+3.3VSB"
|
||||
set in16_min 3.3 * 0.90
|
||||
set in16_max 3.3 * 1.10
|
||||
|
||||
# TEMPERATURE INPUTS
|
||||
|
||||
label temp1 "CPU1"
|
||||
label temp2 "CPU2"
|
||||
label temp3 "LM93"
|
||||
|
||||
# TACHOMETER INPUTS
|
||||
|
||||
label fan1 "Fan1"
|
||||
set fan1_min 3000
|
||||
label fan2 "Fan2"
|
||||
set fan2_min 3000
|
||||
label fan3 "Fan3"
|
||||
set fan3_min 3000
|
||||
label fan4 "Fan4"
|
||||
set fan4_min 3000
|
||||
|
||||
# PWM OUTPUTS
|
||||
|
||||
label pwm1 "CPU1"
|
||||
label pwm2 "CPU2"
|
||||
|
@ -4,6 +4,7 @@ Kernel driver smsc47b397
|
||||
Supported chips:
|
||||
* SMSC LPC47B397-NC
|
||||
* SMSC SCH5307-NS
|
||||
* SMSC SCH5317
|
||||
Prefix: 'smsc47b397'
|
||||
Addresses scanned: none, address read from Super I/O config space
|
||||
Datasheet: In this file
|
||||
@ -18,8 +19,8 @@ The following specification describes the SMSC LPC47B397-NC[1] sensor chip
|
||||
provided by Craig Kelly (In-Store Broadcast Network) and edited/corrected
|
||||
by Mark M. Hoffman <mhoffman@lightlink.com>.
|
||||
|
||||
[1] And SMSC SCH5307-NS, which has a different device ID but is otherwise
|
||||
compatible.
|
||||
[1] And SMSC SCH5307-NS and SCH5317, which have different device IDs but are
|
||||
otherwise compatible.
|
||||
|
||||
* * * * *
|
||||
|
||||
@ -131,7 +132,7 @@ OUT DX,AL
|
||||
The registers of interest for identifying the SIO on the dc7100 are Device ID
|
||||
(0x20) and Device Rev (0x21).
|
||||
|
||||
The Device ID will read 0x6F (for SCH5307-NS, 0x81)
|
||||
The Device ID will read 0x6F (0x81 for SCH5307-NS, and 0x85 for SCH5317)
|
||||
The Device Rev currently reads 0x01
|
||||
|
||||
Obtaining the HWM Base Address.
|
||||
|
@ -172,11 +172,10 @@ pwm[1-*] Pulse width modulation fan control.
|
||||
255 is max or 100%.
|
||||
|
||||
pwm[1-*]_enable
|
||||
Switch PWM on and off.
|
||||
Not always present even if pwmN is.
|
||||
0: turn off
|
||||
1: turn on in manual mode
|
||||
2+: turn on in automatic mode
|
||||
Fan speed control method:
|
||||
0: no fan speed control (i.e. fan at full speed)
|
||||
1: manual fan speed control enabled (using pwm[1-*])
|
||||
2+: automatic fan speed control enabled
|
||||
Check individual chip documentation files for automatic mode
|
||||
details.
|
||||
RW
|
||||
@ -343,9 +342,9 @@ to notify open diodes, unconnected fans etc. where the hardware
|
||||
supports it. When this boolean has value 1, the measurement for that
|
||||
channel should not be trusted.
|
||||
|
||||
in[0-*]_input_fault
|
||||
fan[1-*]_input_fault
|
||||
temp[1-*]_input_fault
|
||||
in[0-*]_fault
|
||||
fan[1-*]_fault
|
||||
temp[1-*]_fault
|
||||
Input fault condition
|
||||
0: no fault occured
|
||||
1: fault condition
|
||||
|
74
Documentation/hwmon/thmc50
Normal file
74
Documentation/hwmon/thmc50
Normal file
@ -0,0 +1,74 @@
|
||||
Kernel driver thmc50
|
||||
=====================
|
||||
|
||||
Supported chips:
|
||||
* Analog Devices ADM1022
|
||||
Prefix: 'adm1022'
|
||||
Addresses scanned: I2C 0x2c - 0x2e
|
||||
Datasheet: http://www.analog.com/en/prod/0,2877,ADM1022,00.html
|
||||
* Texas Instruments THMC50
|
||||
Prefix: 'thmc50'
|
||||
Addresses scanned: I2C 0x2c - 0x2e
|
||||
Datasheet: http://focus.ti.com/docs/prod/folders/print/thmc50.html
|
||||
|
||||
Author: Krzysztof Helt <krzysztof.h1@wp.pl>
|
||||
|
||||
This driver was derived from the 2.4 kernel thmc50.c source file.
|
||||
|
||||
Credits:
|
||||
thmc50.c (2.4 kernel):
|
||||
Frodo Looijaard <frodol@dds.nl>
|
||||
Philip Edelbrock <phil@netroedge.com>
|
||||
|
||||
Module Parameters
|
||||
-----------------
|
||||
|
||||
* adm1022_temp3: short array
|
||||
List of adapter,address pairs to force chips into ADM1022 mode with
|
||||
second remote temperature. This does not work for original THMC50 chips.
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
The THMC50 implements: an internal temperature sensor, support for an
|
||||
external diode-type temperature sensor (compatible w/ the diode sensor inside
|
||||
many processors), and a controllable fan/analog_out DAC. For the temperature
|
||||
sensors, limits can be set through the appropriate Overtemperature Shutdown
|
||||
register and Hysteresis register. Each value can be set and read to half-degree
|
||||
accuracy. An alarm is issued (usually to a connected LM78) when the
|
||||
temperature gets higher then the Overtemperature Shutdown value; it stays on
|
||||
until the temperature falls below the Hysteresis value. All temperatures are in
|
||||
degrees Celsius, and are guaranteed within a range of -55 to +125 degrees.
|
||||
|
||||
The THMC50 only updates its values each 1.5 seconds; reading it more often
|
||||
will do no harm, but will return 'old' values.
|
||||
|
||||
The THMC50 is usually used in combination with LM78-like chips, to measure
|
||||
the temperature of the processor(s).
|
||||
|
||||
The ADM1022 works the same as THMC50 but it is faster (5 Hz instead of
|
||||
1 Hz for THMC50). It can be also put in a new mode to handle additional
|
||||
remote temperature sensor. The driver use the mode set by BIOS by default.
|
||||
|
||||
In case the BIOS is broken and the mode is set incorrectly, you can force
|
||||
the mode with additional remote temperature with adm1022_temp3 parameter.
|
||||
A typical symptom of wrong setting is a fan forced to full speed.
|
||||
|
||||
Driver Features
|
||||
---------------
|
||||
|
||||
The driver provides up to three temperatures:
|
||||
|
||||
temp1 -- internal
|
||||
temp2 -- remote
|
||||
temp3 -- 2nd remote only for ADM1022
|
||||
|
||||
pwm1 -- fan speed (0 = stop, 255 = full)
|
||||
pwm1_mode -- always 0 (DC mode)
|
||||
|
||||
The value of 0 for pwm1 also forces FAN_OFF signal from the chip,
|
||||
so it stops fans even if the value 0 into the ANALOG_OUT register does not.
|
||||
|
||||
The driver was tested on Compaq AP550 with two ADM1022 chips (one works
|
||||
in the temp3 mode), five temperature readings and two fans.
|
||||
|
@ -22,9 +22,9 @@ This driver implements support for the Winbond W83627EHF, W83627EHG, and
|
||||
W83627DHG super I/O chips. We will refer to them collectively as Winbond chips.
|
||||
|
||||
The chips implement three temperature sensors, five fan rotation
|
||||
speed sensors, ten analog voltage sensors (only nine for the 627DHG), alarms
|
||||
with beep warnings (control unimplemented), and some automatic fan regulation
|
||||
strategies (plus manual fan control mode).
|
||||
speed sensors, ten analog voltage sensors (only nine for the 627DHG), one
|
||||
VID (6 pins), alarms with beep warnings (control unimplemented), and
|
||||
some automatic fan regulation strategies (plus manual fan control mode).
|
||||
|
||||
Temperatures are measured in degrees Celsius and measurement resolution is 1
|
||||
degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when
|
||||
|
@ -6,7 +6,7 @@ Supported adapters:
|
||||
Datasheet: Publicly available at the Intel website
|
||||
* ServerWorks OSB4, CSB5, CSB6 and HT-1000 southbridges
|
||||
Datasheet: Only available via NDA from ServerWorks
|
||||
* ATI IXP200, IXP300, IXP400, SB600 and SB700 southbridges
|
||||
* ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges
|
||||
Datasheet: Not publicly available
|
||||
* Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge
|
||||
Datasheet: Publicly available at the SMSC website http://www.smsc.com
|
||||
|
@ -1,3 +1,13 @@
|
||||
---------------------------------------------------------------------------
|
||||
!!!!!!!!!!!!!!!WARNING!!!!!!!!
|
||||
The zero page is a kernel internal data structure, not a stable ABI. It might change
|
||||
without warning and the kernel has no way to detect old version of it.
|
||||
If you're writing some external code like a boot loader you should only use
|
||||
the stable versioned real mode boot protocol described in boot.txt. Otherwise the kernel
|
||||
might break you at any time.
|
||||
!!!!!!!!!!!!!WARNING!!!!!!!!!!!
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
Summary of boot_params layout (kernel point of view)
|
||||
( collected by Hans Lermen and Martin Mares )
|
||||
|
||||
|
@ -99,6 +99,20 @@ Transaction IDs
|
||||
request/response pairs. The upper 32 bits are reserved for use by
|
||||
the kernel and will be overwritten before a MAD is sent.
|
||||
|
||||
P_Key Index Handling
|
||||
|
||||
The old ib_umad interface did not allow setting the P_Key index for
|
||||
MADs that are sent and did not provide a way for obtaining the P_Key
|
||||
index of received MADs. A new layout for struct ib_user_mad_hdr
|
||||
with a pkey_index member has been defined; however, to preserve
|
||||
binary compatibility with older applications, this new layout will
|
||||
not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called
|
||||
before a file descriptor is used for anything else.
|
||||
|
||||
In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
|
||||
to 6, the new layout of struct ib_user_mad_hdr will be used by
|
||||
default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.
|
||||
|
||||
Setting IsSM Capability Bit
|
||||
|
||||
To set the IsSM capability bit for a port, simply open the
|
||||
|
@ -79,7 +79,7 @@ Field 8 -- # of milliseconds spent writing
|
||||
measured from __make_request() to end_that_request_last()).
|
||||
Field 9 -- # of I/Os currently in progress
|
||||
The only field that should go to zero. Incremented as requests are
|
||||
given to appropriate request_queue_t and decremented as they finish.
|
||||
given to appropriate struct request_queue and decremented as they finish.
|
||||
Field 10 -- # of milliseconds spent doing I/Os
|
||||
This field is increases so long as field 9 is nonzero.
|
||||
Field 11 -- weighted # of milliseconds spent doing I/Os
|
||||
|
660
Documentation/ja_JP/HOWTO
Normal file
660
Documentation/ja_JP/HOWTO
Normal file
@ -0,0 +1,660 @@
|
||||
NOTE:
|
||||
This is a version of Documentation/HOWTO translated into Japanese.
|
||||
This document is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com>
|
||||
and the JF Project team <www.linux.or.jp/JF>.
|
||||
If you find any difference between this document and the original file
|
||||
or a problem with the translation,
|
||||
please contact the maintainer of this file or JF project.
|
||||
|
||||
Please also note that the purpose of this file is to be easier to read
|
||||
for non English (read: Japanese) speakers and is not intended as a
|
||||
fork. So if you have any comments or updates for this file, please try
|
||||
to update the original English file first.
|
||||
|
||||
Last Updated: 2007/09/23
|
||||
==================================
|
||||
これは、
|
||||
linux-2.6.23/Documentation/HOWTO
|
||||
の和訳です。
|
||||
|
||||
翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
|
||||
翻訳日: 2007/09/19
|
||||
翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com>
|
||||
校正者: 松倉さん <nbh--mats at nifty dot com>
|
||||
小林 雅典さん (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp>
|
||||
武井伸光さん、<takei at webmasters dot gr dot jp>
|
||||
かねこさん (Seiji Kaneko) <skaneko at a2 dot mbn dot or dot jp>
|
||||
野口さん (Kenji Noguchi) <tokyo246 at gmail dot com>
|
||||
河内さん (Takayoshi Kochi) <t-kochi at bq dot jp dot nec dot com>
|
||||
岩本さん (iwamoto) <iwamoto.kn at ncos dot nec dot co dot jp>
|
||||
内田さん (Satoshi Uchida) <s-uchida at ap dot jp dot nec dot com>
|
||||
==================================
|
||||
|
||||
Linux カーネル開発のやり方
|
||||
-------------------------------
|
||||
|
||||
これは上のトピック( Linux カーネル開発のやり方)の重要な事柄を網羅した
|
||||
ドキュメントです。ここには Linux カーネル開発者になるための方法と
|
||||
Linux カーネル開発コミュニティと共に活動するやり方を学ぶ方法が含まれて
|
||||
います。カーネルプログラミングに関する技術的な項目に関することは何も含
|
||||
めないようにしていますが、カーネル開発者となるための正しい方向に向かう
|
||||
手助けになります。
|
||||
|
||||
もし、このドキュメントのどこかが古くなっていた場合には、このドキュメン
|
||||
トの最後にリストしたメンテナにパッチを送ってください。
|
||||
|
||||
はじめに
|
||||
---------
|
||||
|
||||
あなたは Linux カーネルの開発者になる方法を学びたいのでしょうか? そ
|
||||
れともあなたは上司から「このデバイスの Linux ドライバを書くように」と
|
||||
言われているのでしょうか?
|
||||
この文書の目的は、あなたが踏むべき手順と、コミュニティと一緒にうまく働
|
||||
くヒントを書き下すことで、あなたが知るべき全てのことを教えることです。
|
||||
また、このコミュニティがなぜ今うまくまわっているのかという理由の一部も
|
||||
説明しようと試みています。
|
||||
|
||||
|
||||
カーネルは 少量のアーキテクチャ依存部分がアセンブリ言語で書かれている
|
||||
以外は大部分は C 言語で書かれています。C言語をよく理解していることはカー
|
||||
ネル開発者には必要です。アーキテクチャ向けの低レベル部分の開発をするの
|
||||
でなければ、(どんなアーキテクチャでも)アセンブリ(訳注: 言語)は必要あり
|
||||
ません。以下の本は、C 言語の十分な知識や何年もの経験に取って代わるもの
|
||||
ではありませんが、少なくともリファレンスとしては良い本です。
|
||||
- "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
|
||||
-『プログラミング言語C第2版』(B.W. カーニハン/D.M. リッチー著 石田晴久訳) [共立出版]
|
||||
- "Practical C Programming" by Steve Oualline [O'Reilly]
|
||||
- 『C実践プログラミング第3版』(Steve Oualline著 望月康司監訳 谷口功訳) [オライリージャパン]
|
||||
- "C: A Reference Manual" by Harbison and Steele [Prentice Hall]
|
||||
- 『新・詳説 C 言語 H&S リファレンス』
|
||||
(サミュエル P ハービソン/ガイ L スティール共著 斉藤 信男監訳)[ソフトバンク]
|
||||
|
||||
カーネルは GNU C と GNU ツールチェインを使って書かれています。カーネル
|
||||
は ISO C89 仕様に準拠して書く一方で、標準には無い言語拡張を多く使って
|
||||
います。カーネルは標準 C ライブラリとは関係がないといった、C 言語フリー
|
||||
スタンディング環境です。そのため、C の標準で使えないものもあります。任
|
||||
意の long long の除算や浮動小数点は使えません。
|
||||
ときどき、カーネルがツールチェインや C 言語拡張に置いている前提がどう
|
||||
なっているのかわかりにくいことがあり、また、残念なことに決定的なリファ
|
||||
レンスは存在しません。情報を得るには、gcc の info ページ( info gcc )を
|
||||
見てください。
|
||||
|
||||
あなたは既存の開発コミュニティと一緒に作業する方法を学ぼうとしているこ
|
||||
とに留意してください。そのコミュニティは、コーディング、スタイル、
|
||||
開発手順について高度な標準を持つ、多様な人の集まりです。
|
||||
地理的に分散した大規模なチームに対してもっともうまくいくとわかったこと
|
||||
をベースにしながら、これらの標準は長い時間をかけて築かれてきました。
|
||||
これらはきちんと文書化されていますから、事前にこれらの標準についてでき
|
||||
るだけたくさん学んでください。また皆があなたやあなたの会社のやり方に合わ
|
||||
せてくれると思わないでください。
|
||||
|
||||
法的問題
|
||||
------------
|
||||
|
||||
Linux カーネルのソースコードは GPL ライセンスの下でリリースされていま
|
||||
す。ライセンスの詳細については、ソースツリーのメインディレクトリに存在
|
||||
する、COPYING のファイルを見てください。もしライセンスについてさらに質
|
||||
問があれば、Linux Kernel メーリングリストに質問するのではなく、どうぞ
|
||||
法律家に相談してください。メーリングリストの人達は法律家ではなく、法的
|
||||
問題については彼らの声明はあてにするべきではありません。
|
||||
|
||||
GPL に関する共通の質問や回答については、以下を参照してください。
|
||||
http://www.gnu.org/licenses/gpl-faq.html
|
||||
|
||||
ドキュメント
|
||||
------------
|
||||
|
||||
Linux カーネルソースツリーは幅広い範囲のドキュメントを含んでおり、それ
|
||||
らはカーネルコミュニティと会話する方法を学ぶのに非常に貴重なものです。
|
||||
新しい機能がカーネルに追加される場合、その機能の使い方について説明した
|
||||
新しいドキュメントファイルも追加することを勧めます。
|
||||
カーネルの変更が、カーネルがユーザ空間に公開しているインターフェイスの
|
||||
変更を引き起こす場合、その変更を説明するマニュアルページのパッチや情報
|
||||
をマニュアルページのメンテナ mtk-manpages@gmx.net に送ることを勧めま
|
||||
す。
|
||||
|
||||
以下はカーネルソースツリーに含まれている読んでおくべきファイルの一覧で
|
||||
す-
|
||||
|
||||
README
|
||||
このファイルは Linuxカーネルの簡単な背景とカーネルを設定(訳注
|
||||
configure )し、生成(訳注 build )するために必要なことは何かが書かれ
|
||||
ています。カーネルに関して初めての人はここからスタートすると良いで
|
||||
しょう。
|
||||
|
||||
Documentation/Changes
|
||||
このファイルはカーネルをうまく生成(訳注 build )し、走らせるのに最
|
||||
小限のレベルで必要な数々のソフトウェアパッケージの一覧を示してい
|
||||
ます。
|
||||
|
||||
Documentation/CodingStyle
|
||||
これは Linux カーネルのコーディングスタイルと背景にある理由を記述
|
||||
しています。全ての新しいコードはこのドキュメントにあるガイドライン
|
||||
に従っていることを期待されています。大部分のメンテナはこれらのルー
|
||||
ルに従っているものだけを受け付け、多くの人は正しいスタイルのコード
|
||||
だけをレビューします。
|
||||
|
||||
Documentation/SubmittingPatches
|
||||
Documentation/SubmittingDrivers
|
||||
これらのファイルには、どうやってうまくパッチを作って投稿するかに
|
||||
ついて非常に詳しく書かれており、以下を含みます(これだけに限らない
|
||||
けれども)
|
||||
- Email に含むこと
|
||||
- Email の形式
|
||||
- だれに送るか
|
||||
これらのルールに従えばうまくいくことを保証することではありません
|
||||
が (すべてのパッチは内容とスタイルについて精査を受けるので)、
|
||||
ルールに従わなければ間違いなくうまくいかないでしょう。
|
||||
|
||||
この他にパッチを作る方法についてのよくできた記述は-
|
||||
|
||||
"The Perfect Patch"
|
||||
http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
|
||||
"Linux kernel patch submission format"
|
||||
http://linux.yyz.us/patch-format.html
|
||||
|
||||
Documentation/stable_api_nonsense.txt
|
||||
このファイルはカーネルの中に不変のAPIを持たないことにした意識的な
|
||||
決断の背景にある理由について書かれています。以下のようなことを含
|
||||
んでいます-
|
||||
- サブシステムとの間に層を作ること(コンパチビリティのため?)
|
||||
- オペレーティングシステム間のドライバの移植性
|
||||
- カーネルソースツリーの素早い変更を遅らせる(もしくは素早い変更
|
||||
を妨げる)
|
||||
このドキュメントは Linux 開発の思想を理解するのに非常に重要です。
|
||||
そして、他のOSでの開発者が Linux に移る時にとても重要です。
|
||||
|
||||
Documentation/SecurityBugs
|
||||
もし Linux カーネルでセキュリティ問題を発見したように思ったら、こ
|
||||
のドキュメントのステップに従ってカーネル開発者に連絡し、問題解決を
|
||||
支援してください。
|
||||
|
||||
Documentation/ManagementStyle
|
||||
このドキュメントは Linux カーネルのメンテナ達がどう行動するか、
|
||||
彼らの手法の背景にある共有されている精神について記述しています。こ
|
||||
れはカーネル開発の初心者なら(もしくは、単に興味があるだけの人でも)
|
||||
重要です。なぜならこのドキュメントは、カーネルメンテナ達の独特な
|
||||
行動についての多くの誤解や混乱を解消するからです。
|
||||
|
||||
Documentation/stable_kernel_rules.txt
|
||||
このファイルはどのように stable カーネルのリリースが行われるかのルー
|
||||
ルが記述されています。そしてこれらのリリースの中のどこかで変更を取
|
||||
り入れてもらいたい場合に何をすれば良いかが示されています。
|
||||
|
||||
Documentation/kernel-docs.txt
|
||||
カーネル開発に付随する外部ドキュメントのリストです。もしあなたが
|
||||
探しているものがカーネル内のドキュメントでみつからなかった場合、
|
||||
このリストをあたってみてください。
|
||||
|
||||
Documentation/applying-patches.txt
|
||||
パッチとはなにか、パッチをどうやって様々なカーネルの開発ブランチに
|
||||
適用するのかについて正確に記述した良い入門書です。
|
||||
|
||||
カーネルはソースコードから自動的に生成可能な多数のドキュメントを自分自
|
||||
身でもっています。これにはカーネル内 API のすべての記述や、どう正しく
|
||||
ロックをかけるかの規則が含まれます。このドキュメントは
|
||||
Documentation/DocBook/ ディレクトリに作られ、以下のように
|
||||
make pdfdocs
|
||||
make psdocs
|
||||
make htmldocs
|
||||
make mandocs
|
||||
コマンドを実行するとメインカーネルのソースディレクトリから
|
||||
それぞれ、PDF, Postscript, HTML, man page の形式で生成されます。
|
||||
|
||||
カーネル開発者になるには
|
||||
---------------------------
|
||||
|
||||
もしあなたが、Linux カーネル開発について何も知らないならば、
|
||||
KernelNewbies プロジェクトを見るべきです
|
||||
http://kernelnewbies.org
|
||||
|
||||
このサイトには役に立つメーリングリストがあり、基本的なカーネル開発に関
|
||||
するほとんどどんな種類の質問もできます (既に回答されているようなことを
|
||||
聞く前にまずはアーカイブを調べてください)。
|
||||
またここには、リアルタイムで質問を聞くことができる IRC チャネルや、Linux
|
||||
カーネルの開発に関して学ぶのに便利なたくさんの役に立つドキュメントがあ
|
||||
ります。
|
||||
|
||||
web サイトには、コードの構成、サブシステム、現在存在するプロジェクト(ツ
|
||||
リーにあるもの無いものの両方)の基本的な管理情報があります。
|
||||
ここには、また、カーネルのコンパイルのやり方やパッチの当て方などの間接
|
||||
的な基本情報も記述されています。
|
||||
|
||||
あなたがどこからスタートして良いかわからないが、Linux カーネル開発コミュ
|
||||
ニティに参加して何かすることをさがしている場合には、Linux kernel
|
||||
Janitor's プロジェクトにいけば良いでしょう -
|
||||
http://janitor.kernelnewbies.org/
|
||||
ここはそのようなスタートをするのにうってつけの場所です。ここには、
|
||||
Linux カーネルソースツリーの中に含まれる、きれいにし、修正しなければな
|
||||
らない、単純な問題のリストが記述されています。このプロジェクトに関わる
|
||||
開発者と一緒に作業することで、あなたのパッチを Linuxカーネルツリーに入
|
||||
れるための基礎を学ぶことができ、そしてもしあなたがまだアイディアを持っ
|
||||
ていない場合には、次にやる仕事の方向性が見えてくるかもしれません。
|
||||
|
||||
もしあなたが、すでにひとまとまりコードを書いていて、カーネルツリーに入
|
||||
れたいと思っていたり、それに関する適切な支援を求めたい場合、カーネル
|
||||
メンターズプロジェクトはそのような皆さんを助けるためにできました。
|
||||
ここにはメーリングリストがあり、以下から参照できます
|
||||
http://selenic.com/mailman/listinfo/kernel-mentors
|
||||
|
||||
実際に Linux カーネルのコードについて修正を加える前に、どうやってその
|
||||
コードが動作するのかを理解することが必要です。そのためには、特別なツー
|
||||
ルの助けを借りてでも、それを直接よく読むことが最良の方法です(ほとんど
|
||||
のトリッキーな部分は十分にコメントしてありますから)。そういうツールで
|
||||
特におすすめなのは、Linux クロスリファレンスプロジェクトです。これは、
|
||||
自己参照方式で、索引がついた web 形式で、ソースコードを参照することが
|
||||
できます。この最新の素晴しいカーネルコードのリポジトリは以下で見つかり
|
||||
ます-
|
||||
http://sosdg.org/~qiyong/lxr/
|
||||
|
||||
開発プロセス
|
||||
-----------------------
|
||||
|
||||
Linux カーネルの開発プロセスは現在幾つかの異なるメインカーネル「ブラン
|
||||
チ」と多数のサブシステム毎のカーネルブランチから構成されます。
|
||||
これらのブランチとは-
|
||||
- メインの 2.6.x カーネルツリー
|
||||
- 2.6.x.y -stable カーネルツリー
|
||||
- 2.6.x -git カーネルパッチ
|
||||
- 2.6.x -mm カーネルパッチ
|
||||
- サブシステム毎のカーネルツリーとパッチ
|
||||
|
||||
2.6.x カーネルツリー
|
||||
-----------------
|
||||
|
||||
2.6.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org
|
||||
の pub/linux/kernel/v2.6/ ディレクトリに存在します。この開発プロセスは
|
||||
以下のとおり-
|
||||
|
||||
- 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、
|
||||
この期間中に、メンテナ達は Linus に大きな差分を送ることができます。
|
||||
このような差分は通常 -mm カーネルに数週間含まれてきたパッチです。
|
||||
大きな変更は git(カーネルのソース管理ツール、詳細は
|
||||
http://git.or.cz/ 参照) を使って送るのが好ましいやり方ですが、パッ
|
||||
チファイルの形式のまま送るのでも十分です。
|
||||
|
||||
- 2週間後、-rc1 カーネルがリリースされ、この後にはカーネル全体の安定
|
||||
性に影響をあたえるような新機能は含まない類のパッチしか取り込むこと
|
||||
はできません。新しいドライバ(もしくはファイルシステム)のパッチは
|
||||
-rc1 の後で受け付けられることもあることを覚えておいてください。な
|
||||
ぜなら、変更が独立していて、追加されたコードの外の領域に影響を与え
|
||||
ない限り、退行のリスクは無いからです。-rc1 がリリースされた後、
|
||||
Linus へパッチを送付するのに git を使うこともできますが、パッチは
|
||||
レビューのために、パブリックなメーリングリストへも同時に送る必要が
|
||||
あります。
|
||||
|
||||
- 新しい -rc は Linus が、最新の git ツリーがテスト目的であれば十分
|
||||
に安定した状態にあると判断したときにリリースされます。目標は毎週新
|
||||
しい -rc カーネルをリリースすることです。
|
||||
|
||||
- 以下の URL で各 -rc リリースに存在する既知の後戻り問題のリスト
|
||||
が追跡されます-
|
||||
http://kernelnewbies.org/known_regressions
|
||||
|
||||
- このプロセスはカーネルが 「準備ができた」と考えられるまで継続しま
|
||||
す。このプロセスはだいたい 6週間継続します。
|
||||
|
||||
Andrew Morton が Linux-kernel メーリングリストにカーネルリリースについ
|
||||
て書いたことをここで言っておくことは価値があります-
|
||||
「カーネルがいつリリースされるかは誰も知りません。なぜなら、これは現
|
||||
実に認識されたバグの状況によりリリースされるのであり、前もって決めら
|
||||
れた計画によってリリースされるものではないからです。」
|
||||
|
||||
2.6.x.y -stable カーネルツリー
|
||||
---------------------------
|
||||
|
||||
バージョンに4つ目の数字がついたカーネルは -stable カーネルです。これに
|
||||
は、2.6.x カーネルで見つかったセキュリティ問題や重大な後戻りに対する比
|
||||
較的小さい重要な修正が含まれます。
|
||||
|
||||
これは、開発/実験的バージョンのテストに協力することに興味が無く、
|
||||
最新の安定したカーネルを使いたいユーザに推奨するブランチです。
|
||||
|
||||
もし、2.6.x.y カーネルが存在しない場合には、番号が一番大きい 2.6.x
|
||||
が最新の安定版カーネルです。
|
||||
|
||||
2.6.x.y は "stable" チーム <stable@kernel.org> でメンテされており、だ
|
||||
いたい隔週でリリースされています。
|
||||
|
||||
カーネルツリーに入っている、Documentation/stable_kernel_rules.txt ファ
|
||||
イルにはどのような種類の変更が -stable ツリーに受け入れ可能か、またリ
|
||||
リースプロセスがどう動くかが記述されています。
|
||||
|
||||
2.6.x -git パッチ
|
||||
------------------
|
||||
|
||||
git リポジトリで管理されているLinus のカーネルツリーの毎日のスナップ
|
||||
ショットがあります。(だから -git という名前がついています)。これらのパッ
|
||||
チはおおむね毎日リリースされており、Linus のツリーの現状を表します。こ
|
||||
れは -rc カーネルと比べて、パッチが大丈夫かどうかも確認しないで自動的
|
||||
に生成されるので、より実験的です。
|
||||
|
||||
2.6.x -mm カーネルパッチ
|
||||
------------------------
|
||||
|
||||
Andrew Morton によってリリースされる実験的なカーネルパッチ群です。
|
||||
Andrew は個別のサブシステムカーネルツリーとパッチを全て集めてきて
|
||||
linux-kernel メーリングリストで収集された多数のパッチと同時に一つにま
|
||||
とめます。
|
||||
このツリーは新機能とパッチが検証される場となります。ある期間の間パッチ
|
||||
が -mm に入って価値を証明されたら、Andrew やサブシステムメンテナが、
|
||||
メインラインへ入れるように Linus にプッシュします。
|
||||
|
||||
メインカーネルツリーに含めるために Linus に送る前に、すべての新しいパッ
|
||||
チが -mm ツリーでテストされることが強く推奨されます。
|
||||
|
||||
これらのカーネルは安定して動作すべきシステムとして使うのには適切ではあ
|
||||
りませんし、カーネルブランチの中でももっとも動作にリスクが高いものです。
|
||||
|
||||
もしあなたが、カーネル開発プロセスの支援をしたいと思っているのであれば、
|
||||
どうぞこれらのカーネルリリースをテストに使ってみて、そしてもし問題があ
|
||||
れば、またもし全てが正しく動作したとしても、linux-kernel メーリングリ
|
||||
ストにフィードバックを提供してください。
|
||||
|
||||
すべての他の実験的パッチに加えて、これらのカーネルは通常リリース時点で
|
||||
メインラインの -git カーネルに含まれる全ての変更も含んでいます。
|
||||
|
||||
-mm カーネルは決まったスケジュールではリリースされません、しかし通常幾
|
||||
つかの -mm カーネル (1 から 3 が普通)が各-rc カーネルの間にリリースさ
|
||||
れます。
|
||||
|
||||
サブシステム毎のカーネルツリーとパッチ
|
||||
-------------------------------------------
|
||||
|
||||
カーネルの様々な領域で何が起きているかを見られるようにするため、多くの
|
||||
カーネルサブシステム開発者は彼らの開発ツリーを公開しています。これらの
|
||||
ツリーは説明したように -mm カーネルリリースに入れ込まれます。
|
||||
|
||||
以下はさまざまなカーネルツリーの中のいくつかのリスト-
|
||||
|
||||
git ツリー-
|
||||
- Kbuild の開発ツリー、Sam Ravnborg <sam@ravnborg.org>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
|
||||
|
||||
- ACPI の開発ツリー、 Len Brown <len.brown@intel.com>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
|
||||
|
||||
- Block の開発ツリー、Jens Axboe <axboe@suse.de>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
|
||||
|
||||
- DRM の開発ツリー、Dave Airlie <airlied@linux.ie>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git
|
||||
|
||||
- ia64 の開発ツリー、Tony Luck <tony.luck@intel.com>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git
|
||||
|
||||
- infiniband, Roland Dreier <rolandd@cisco.com>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
|
||||
|
||||
- libata, Jeff Garzik <jgarzik@pobox.com>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git
|
||||
|
||||
- ネットワークドライバ, Jeff Garzik <jgarzik@pobox.com>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git
|
||||
|
||||
- pcmcia, Dominik Brodowski <linux@dominikbrodowski.net>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git
|
||||
|
||||
- SCSI, James Bottomley <James.Bottomley@SteelEye.com>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
|
||||
|
||||
quilt ツリー-
|
||||
- USB, PCI ドライバコアと I2C, Greg Kroah-Hartman <gregkh@suse.de>
|
||||
kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
|
||||
- x86-64 と i386 の仲間 Andi Kleen <ak@suse.de>
|
||||
|
||||
その他のカーネルツリーは http://git.kernel.org/ と MAINTAINERS ファ
|
||||
イルに一覧表があります。
|
||||
|
||||
バグレポート
|
||||
-------------
|
||||
|
||||
bugzilla.kernel.org は Linux カーネル開発者がカーネルのバグを追跡する
|
||||
場所です。ユーザは見つけたバグの全てをこのツールで報告すべきです。
|
||||
どう kernel bugzilla を使うかの詳細は、以下を参照してください-
|
||||
http://test.kernel.org/bugzilla/faq.html
|
||||
|
||||
メインカーネルソースディレクトリにあるファイル REPORTING-BUGS はカーネ
|
||||
ルバグらしいものについてどうレポートするかの良いテンプレートであり、問
|
||||
題の追跡を助けるためにカーネル開発者にとってどんな情報が必要なのかの詳
|
||||
細が書かれています。
|
||||
|
||||
メーリングリスト
|
||||
-------------
|
||||
|
||||
上のいくつかのドキュメントで述べていますが、コアカーネル開発者の大部分
|
||||
は Linux kernel メーリングリストに参加しています。このリストの登録/脱
|
||||
退の方法については以下を参照してください-
|
||||
http://vger.kernel.org/vger-lists.html#linux-kernel
|
||||
|
||||
このメーリングリストのアーカイブは web 上の多数の場所に存在します。こ
|
||||
れらのアーカイブを探すにはサーチエンジンを使いましょう。例えば-
|
||||
http://dir.gmane.org/gmane.linux.kernel
|
||||
|
||||
リストに投稿する前にすでにその話題がアーカイブに存在するかどうかを検索
|
||||
することを是非やってください。多数の事がすでに詳細に渡って議論されて
|
||||
おり、アーカイブにのみ記録されています。
|
||||
|
||||
大部分のカーネルサブシステムも自分の個別の開発を実施するメーリングリス
|
||||
トを持っています。個々のグループがどんなリストを持っているかは、
|
||||
MAINTAINERS ファイルにリストがありますので参照してください。
|
||||
|
||||
多くのリストは kernel.org でホストされています。これらの情報は以下にあ
|
||||
ります-
|
||||
http://vger.kernel.org/vger-lists.html
|
||||
|
||||
メーリングリストを使う場合、良い行動習慣に従うようにしましょう。
|
||||
少し安っぽいが、以下の URL は上のリスト(や他のリスト)で会話する場合の
|
||||
シンプルなガイドラインを示しています-
|
||||
http://www.albion.com/netiquette/
|
||||
|
||||
もし複数の人があなたのメールに返事をした場合、CC: で受ける人のリストは
|
||||
だいぶ多くなるでしょう。良い理由がない場合、CC: リストから誰かを削除を
|
||||
しないように、また、メーリングリストのアドレスだけにリプライすることの
|
||||
ないようにしましょう。1つは送信者から、もう1つはリストからのように、メー
|
||||
ルを2回受けることになってもそれに慣れ、しゃれたメールヘッダーを追加し
|
||||
てこの状態を変えようとしないように。人々はそのようなことは好みません。
|
||||
|
||||
今までのメールでのやりとりとその間のあなたの発言はそのまま残し、
|
||||
"John Kernlehacker wrote ...:" の行をあなたのリプライの先頭行にして、
|
||||
メールの先頭でなく、各引用行の間にあなたの言いたいことを追加するべきで
|
||||
す。
|
||||
|
||||
もしパッチをメールに付ける場合は、Documentaion/SubmittingPatches に提
|
||||
示されているように、それは プレーンな可読テキストにすることを忘れない
|
||||
ようにしましょう。カーネル開発者は 添付や圧縮したパッチを扱いたがりま
|
||||
せん-
|
||||
彼らはあなたのパッチの行毎にコメントを入れたいので、そのためにはそうす
|
||||
るしかありません。あなたのメールプログラムが空白やタブを圧縮しないよう
|
||||
に確認した方が良いです。最初の良いテストとしては、自分にメールを送って
|
||||
みて、そのパッチを自分で当ててみることです。もしそれがうまく行かないな
|
||||
ら、あなたのメールプログラムを直してもらうか、正しく動くように変えるべ
|
||||
きです。
|
||||
|
||||
とりわけ、他の登録者に対する尊敬を表すようにすることを覚えておいてくだ
|
||||
さい。
|
||||
|
||||
コミュニティと共に働くこと
|
||||
--------------------------
|
||||
|
||||
カーネルコミュニティのゴールは可能なかぎり最高のカーネルを提供すること
|
||||
です。あなたがパッチを受け入れてもらうために投稿した場合、それは、技術
|
||||
的メリットだけがレビューされます。その際、あなたは何を予想すべきでしょ
|
||||
うか?
|
||||
- 批判
|
||||
- コメント
|
||||
- 変更の要求
|
||||
- パッチの正当性の証明要求
|
||||
- 沈黙
|
||||
|
||||
思い出してください、ここはあなたのパッチをカーネルに入れる話です。あ
|
||||
なたは、あなたのパッチに対する批判とコメントを受け入れるべきで、それら
|
||||
を技術的レベルで評価して、パッチを再作成するか、なぜそれらの変更をすべ
|
||||
きでないかを明確で簡潔な理由の説明を提供してください。
|
||||
もし、あなたのパッチに何も反応がない場合、たまにはメールの山に埋もれて
|
||||
見逃され、あなたの投稿が忘れられてしまうこともあるので、数日待って再度
|
||||
投稿してください。
|
||||
|
||||
あなたがやるべきでないものは?
|
||||
- 質問なしにあなたのパッチが受け入れられると想像すること
|
||||
- 守りに入ること
|
||||
- コメントを無視すること
|
||||
- 要求された変更を何もしないでパッチを出し直すこと
|
||||
|
||||
可能な限り最高の技術的解決を求めているコミュニティでは、パッチがどのく
|
||||
らい有益なのかについては常に異なる意見があります。あなたは協調的である
|
||||
べきですし、また、あなたのアイディアをカーネルに対してうまく合わせるよ
|
||||
うにすることが望まれています。もしくは、最低限あなたのアイディアがそれ
|
||||
だけの価値があるとすすんで証明するようにしなければなりません。
|
||||
正しい解決に向かって進もうという意志がある限り、間違うことがあっても許
|
||||
容されることを忘れないでください。
|
||||
|
||||
あなたの最初のパッチに単に 1ダースもの修正を求めるリストの返答になるこ
|
||||
とも普通のことです。これはあなたのパッチが受け入れられないということで
|
||||
は *ありません*、そしてあなた自身に反対することを意味するのでも *ありま
|
||||
せん*。単に自分のパッチに対して指摘された問題を全て修正して再送すれば
|
||||
良いのです。
|
||||
|
||||
|
||||
カーネルコミュニティと企業組織のちがい
|
||||
-----------------------------------------------------------------
|
||||
|
||||
カーネルコミュニティは大部分の伝統的な会社の開発環境とは異ったやり方で
|
||||
動いています。以下は問題を避けるためにできると良いことのリストです-
|
||||
|
||||
あなたの提案する変更について言うときのうまい言い方:
|
||||
|
||||
- "これは複数の問題を解決します"
|
||||
- "これは2000行のコードを削除します"
|
||||
- "以下のパッチは、私が言おうとしていることを説明するものです"
|
||||
- "私はこれを5つの異なるアーキテクチャでテストしたのですが..."
|
||||
- "以下は一連の小さなパッチ群ですが..."
|
||||
- "これは典型的なマシンでの性能を向上させます.."
|
||||
|
||||
やめた方が良い悪い言い方:
|
||||
|
||||
- このやり方で AIX/ptx/Solaris ではできたので、できるはずだ
|
||||
- 私はこれを20年もの間やってきた、だから
|
||||
- これは、私の会社が金儲けをするために必要だ
|
||||
- これは我々のエンタープライズ向け商品ラインのためである
|
||||
- これは 私が自分のアイディアを記述した、1000ページの設計資料である
|
||||
- 私はこれについて、6ケ月作業している。
|
||||
- 以下は ... に関する5000行のパッチです
|
||||
- 私は現在のぐちゃぐちゃを全部書き直した、それが以下です...
|
||||
- 私は〆切がある、そのためこのパッチは今すぐ適用される必要がある
|
||||
|
||||
カーネルコミュニティが大部分の伝統的なソフトウェアエンジニアリングの労
|
||||
働環境と異なるもう一つの点は、やりとりに顔を合わせないということです。
|
||||
email と irc を第一のコミュニケーションの形とする一つの利点は、性別や
|
||||
民族の差別がないことです。Linux カーネルの職場環境は女性や少数民族を受
|
||||
容します。なぜなら、email アドレスによってのみあなたが認識されるからで
|
||||
す。
|
||||
国際的な側面からも活動領域を均等にするようにします。なぜならば、あなた
|
||||
は人の名前で性別を想像できないからです。ある男性が アンドレアという名
|
||||
前で、女性の名前は パット かもしれません (訳注 Andrea は米国では女性、
|
||||
それ以外(欧州など)では男性名として使われることが多い。同様に、Pat は
|
||||
Patricia (主に女性名)や Patrick (主に男性名)の略称)。
|
||||
Linux カーネルの活動をして、意見を表明したことがある大部分の女性は、前
|
||||
向きな経験をもっています。
|
||||
|
||||
言葉の壁は英語が得意でない一部の人には問題になります。
|
||||
メーリングリストの中できちんとアイディアを交換するには、相当うまく英語
|
||||
を操れる必要があることもあります。そのため、あなたは自分のメール
|
||||
を送る前に英語で意味が通じているかをチェックすることをお薦めします。
|
||||
|
||||
変更を分割する
|
||||
---------------------
|
||||
|
||||
Linux カーネルコミュニティは、一度に大量のコードの塊を喜んで受容するこ
|
||||
とはありません。変更は正確に説明される必要があり、議論され、小さい、個
|
||||
別の部分に分割する必要があります。これはこれまで多くの会社がやり慣れて
|
||||
きたことと全く正反対のことです。あなたのプロポーザルは、開発プロセスのと
|
||||
ても早い段階から紹介されるべきです。そうすれば あなたは自分のやってい
|
||||
ることにフィードバックを得られます。これは、コミュニティからみれば、あ
|
||||
なたが彼らと一緒にやっているように感じられ、単にあなたの提案する機能の
|
||||
ゴミ捨て場として使っているのではない、と感じられるでしょう。
|
||||
しかし、一度に 50 もの email をメーリングリストに送りつけるようなことは
|
||||
やってはいけません、あなたのパッチ群はいつもどんな時でもそれよりは小さ
|
||||
くなければなりません。
|
||||
|
||||
パッチを分割する理由は以下です-
|
||||
|
||||
1) 小さいパッチはあなたのパッチが適用される見込みを大きくします、カー
|
||||
ネルの人達はパッチが正しいかどうかを確認する時間や労力をかけないか
|
||||
らです。5行のパッチはメンテナがたった1秒見るだけで適用できます。
|
||||
しかし、500行のパッチは、正しいことをレビューするのに数時間かかるか
|
||||
もしれません(時間はパッチのサイズなどにより指数関数に比例してかかり
|
||||
ます)
|
||||
|
||||
小さいパッチは何かあったときにデバッグもとても簡単になります。パッ
|
||||
チを1個1個取り除くのは、とても大きなパッチを当てた後に(かつ、何かお
|
||||
かしくなった後で)解剖するのに比べればとても簡単です。
|
||||
|
||||
2) 小さいパッチを送るだけでなく、送るまえに、書き直して、シンプルにす
|
||||
る(もしくは、単に順番を変えるだけでも)ことも、とても重要です。
|
||||
|
||||
以下はカーネル開発者の Al Viro のたとえ話です:
|
||||
|
||||
"生徒の数学の宿題を採点する先生のことを考えてみてください、先
|
||||
生は生徒が解に到達するまでの試行錯誤を見たいとは思わないでしょ
|
||||
う。先生は簡潔な最高の解を見たいのです。良い生徒はこれを知って
|
||||
おり、そして最終解の前の中間作業を提出することは決してないので
|
||||
す"
|
||||
|
||||
カーネル開発でもこれは同じです。メンテナ達とレビューア達は、
|
||||
問題を解決する解の背後になる思考プロセスを見たいとは思いません。
|
||||
彼らは単純であざやかな解決方法を見たいのです。
|
||||
|
||||
あざやかな解を説明するのと、コミュニティと共に仕事をし、未解決の仕事を
|
||||
議論することのバランスをキープするのは難しいかもしれません。
|
||||
ですから、開発プロセスの早期段階で改善のためのフィードバックをもらうよ
|
||||
うにするのも良いですが、変更点を小さい部分に分割して全体ではまだ完成し
|
||||
ていない仕事を(部分的に)取り込んでもらえるようにすることも良いことです。
|
||||
|
||||
また、でき上がっていないものや、"将来直す" ようなパッチを、本流に含め
|
||||
てもらうように送っても、それは受け付けられないことを理解してください。
|
||||
|
||||
あなたの変更を正当化する
|
||||
-------------------
|
||||
|
||||
あなたのパッチを分割するのと同時に、なぜその変更を追加しなければならな
|
||||
いかを Linux コミュニティに知らせることはとても重要です。新機能は必要
|
||||
性と有用性で正当化されなければなりません。
|
||||
|
||||
あなたの変更の説明
|
||||
--------------------
|
||||
|
||||
あなたのパッチを送付する場合には、メールの中のテキストで何を言うかにつ
|
||||
いて、特別に注意を払ってください。この情報はパッチの ChangeLog に使わ
|
||||
れ、いつも皆がみられるように保管されます。これは次のような項目を含め、
|
||||
パッチを完全に記述するべきです-
|
||||
|
||||
- なぜ変更が必要か
|
||||
- パッチ全体の設計アプローチ
|
||||
- 実装の詳細
|
||||
- テスト結果
|
||||
|
||||
これについて全てがどのようにあるべきかについての詳細は、以下のドキュメ
|
||||
ントの ChangeLog セクションを見てください-
|
||||
"The Perfect Patch"
|
||||
http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
|
||||
|
||||
これらのどれもが、時にはとても困難です。これらの慣例を完璧に実施するに
|
||||
は数年かかるかもしれません。これは継続的な改善のプロセスであり、そのた
|
||||
めには多数の忍耐と決意を必要とするものです。でも、諦めないで、これは可
|
||||
能なことです。多数の人がすでにできていますし、彼らも皆最初はあなたと同
|
||||
じところからスタートしたのですから。
|
||||
|
||||
Paolo Ciarrocchi に感謝、彼は彼の書いた "Development Process"
|
||||
(http://linux.tar.bz/articles/2.6-development_process)セクショ
|
||||
ンをこのテキストの原型にすることを許可してくれました。
|
||||
Rundy Dunlap と Gerrit Huizenga はメーリングリストでやるべきこととやっ
|
||||
てはいけないことのリストを提供してくれました。
|
||||
以下の人々のレビュー、コメント、貢献に感謝。
|
||||
Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers,
|
||||
Vojtech Pavlik, Jan Kara, Josh Boyer, Kees Cook, Andrew Morton, Andi
|
||||
Kleen, Vadim Lobanov, Jesper Juhl, Adrian Bunk, Keri Harris, Frans Pop,
|
||||
David A. Wheeler, Junio Hamano, Michael Kerrisk, と Alex Shepard
|
||||
彼らの支援なしでは、このドキュメントはできなかったでしょう。
|
||||
|
||||
Maintainer: Greg Kroah-Hartman <greg@kroah.com>
|
263
Documentation/ja_JP/stable_api_nonsense.txt
Normal file
263
Documentation/ja_JP/stable_api_nonsense.txt
Normal file
@ -0,0 +1,263 @@
|
||||
NOTE:
|
||||
This is a version of Documentation/stable_api_nonsense.txt into Japanese.
|
||||
This document is maintained by IKEDA, Munehiro <m-ikeda@ds.jp.nec.com>
|
||||
and the JF Project team <http://www.linux.or.jp/JF/>.
|
||||
If you find any difference between this document and the original file
|
||||
or a problem with the translation,
|
||||
please contact the maintainer of this file or JF project.
|
||||
|
||||
Please also note that the purpose of this file is to be easier to read
|
||||
for non English (read: Japanese) speakers and is not intended as a
|
||||
fork. So if you have any comments or updates of this file, please try
|
||||
to update the original English file first.
|
||||
|
||||
Last Updated: 2007/07/18
|
||||
==================================
|
||||
これは、
|
||||
linux-2.6.22-rc4/Documentation/stable_api_nonsense.txt の和訳
|
||||
です。
|
||||
翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
|
||||
翻訳日 : 2007/06/11
|
||||
原著作者: Greg Kroah-Hartman < greg at kroah dot com >
|
||||
翻訳者 : 池田 宗広 < m-ikeda at ds dot jp dot nec dot com >
|
||||
校正者 : Masanori Kobayashi さん < zap03216 at nifty dot ne dot jp >
|
||||
Seiji Kaneko さん < skaneko at a2 dot mbn dot or dot jp >
|
||||
==================================
|
||||
|
||||
|
||||
|
||||
Linux カーネルのドライバインターフェース
|
||||
(あなたの質問すべてに対する回答とその他諸々)
|
||||
|
||||
Greg Kroah-Hartman <greg at kroah dot com>
|
||||
|
||||
|
||||
この文書は、なぜ Linux ではバイナリカーネルインターフェースが定義
|
||||
されていないのか、またはなぜ不変のカーネルインターフェースを持たな
|
||||
いのか、ということを説明するために書かれた。ここでの話題は「カーネ
|
||||
ル内部の」インターフェースについてであり、ユーザー空間とのインター
|
||||
フェースではないことを理解してほしい。カーネルとユーザー空間とのイ
|
||||
ンターフェースとはアプリケーションプログラムが使用するものであり、
|
||||
つまりシステムコールのインターフェースがこれに当たる。これは今まで
|
||||
長きに渡り、かつ今後も「まさしく」不変である。私は確か 0.9 か何か
|
||||
より前のカーネルを使ってビルドした古いプログラムを持っているが、そ
|
||||
れは最新の 2.6 カーネルでもきちんと動作する。ユーザー空間とのイン
|
||||
ターフェースは、ユーザーとアプリケーションプログラマが不変性を信頼
|
||||
してよいものの一つである。
|
||||
|
||||
|
||||
要旨
|
||||
----
|
||||
|
||||
あなたは不変のカーネルインターフェースが必要だと考えているかもしれ
|
||||
ないが、実際のところはそうではない。あなたは必要としているものが分
|
||||
かっていない。あなたが必要としているものは安定して動作するドライバ
|
||||
であり、それはドライバがメインのカーネルツリーに含まれる場合のみ得
|
||||
ることができる。ドライバがメインのカーネルツリーに含まれていると、
|
||||
他にも多くの良いことがある。それは、Linux をより強固で、安定な、成
|
||||
熟したオペレーティングシステムにすることができるということだ。これ
|
||||
こそ、そもそもあなたが Linux を使う理由のはずだ。
|
||||
|
||||
|
||||
はじめに
|
||||
--------
|
||||
|
||||
カーネル内部のインターフェース変更を心配しなければならないドライバ
|
||||
を書きたいなどというのは、変わり者だけだ。この世界のほとんどの人は、
|
||||
そのようなドライバがどんなインターフェースを使っているかなど知らな
|
||||
いし、そんなドライバのことなど全く気にもかけていない。
|
||||
|
||||
|
||||
まず初めに、クローズソースとか、ソースコードの隠蔽とか、バイナリの
|
||||
みが配布される使い物にならない代物[訳注(1)]とか、実体はバイナリ
|
||||
コードでそれを読み込むためのラッパー部分のみソースコードが公開され
|
||||
ているとか、その他用語は何であれ GPL の下にソースコードがリリース
|
||||
されていないカーネルドライバに関する法的な問題について、私は「いか
|
||||
なる議論も」行うつもりがない。法的な疑問があるのならば、プログラマ
|
||||
である私ではなく、弁護士に相談して欲しい。ここでは単に、技術的な問
|
||||
題について述べることにする。(法的な問題を軽視しているわけではない。
|
||||
それらは実際に存在するし、あなたはそれをいつも気にかけておく必要が
|
||||
ある)
|
||||
|
||||
訳注(1)
|
||||
「使い物にならない代物」の原文は "blob"
|
||||
|
||||
|
||||
さてここでは、バイナリカーネルインターフェースについてと、ソースレ
|
||||
ベルでのインターフェースの不変性について、という二つの話題を取り上
|
||||
げる。この二つは互いに依存する関係にあるが、まずはバイナリインター
|
||||
フェースについて議論を行いやっつけてしまおう。
|
||||
|
||||
|
||||
バイナリカーネルインターフェース
|
||||
--------------------------------
|
||||
|
||||
もしソースレベルでのインターフェースが不変ならば、バイナリインター
|
||||
フェースも当然のように不変である、というのは正しいだろうか?正しく
|
||||
ない。Linux カーネルに関する以下の事実を考えてみてほしい。
|
||||
- あなたが使用するCコンパイラのバージョンによって、カーネル内部
|
||||
の構造体の配置構造は異なったものになる。また、関数は異なった方
|
||||
法でカーネルに含まれることになるかもしれない(例えばインライン
|
||||
関数として扱われたり、扱われなかったりする)。個々の関数がどの
|
||||
ようにコンパイルされるかはそれほど重要ではないが、構造体のパデ
|
||||
ィングが異なるというのは非常に重要である。
|
||||
- あなたがカーネルのビルドオプションをどのように設定するかによっ
|
||||
て、カーネルには広い範囲で異なった事態が起こり得る。
|
||||
- データ構造は異なるデータフィールドを持つかもしれない
|
||||
- いくつかの関数は全く実装されていない状態になり得る
|
||||
(例:SMP向けではないビルドでは、いくつかのロックは中身が
|
||||
カラにコンパイルされる)
|
||||
- カーネル内のメモリは、異なった方法で配置され得る。これはビ
|
||||
ルドオプションに依存している。
|
||||
- Linux は様々な異なるプロセッサアーキテクチャ上で動作する。
|
||||
あるアーキテクチャ用のバイナリドライバを、他のアーキテクチャで
|
||||
正常に動作させる方法はない。
|
||||
|
||||
|
||||
ある特定のカーネル設定を使用し、カーネルをビルドしたのと正確に同じ
|
||||
Cコンパイラを使用して単にカーネルモジュールをコンパイルするだけで
|
||||
も、あなたはこれらいくつもの問題に直面することになる。ある特定の
|
||||
Linux ディストリビューションの、ある特定のリリースバージョン用にモ
|
||||
ジュールを提供しようと思っただけでも、これらの問題を引き起こすには
|
||||
十分である。にも関わらず Linux ディストリビューションの数と、サ
|
||||
ポートするディストリビューションのリリース数を掛け算し、それら一つ
|
||||
一つについてビルドを行ったとしたら、今度はリリースごとのビルドオプ
|
||||
ションの違いという悪夢にすぐさま悩まされることになる。また、ディス
|
||||
トリビューションの各リリースバージョンには、異なるハードウェア(プ
|
||||
ロセッサタイプや種々のオプション)に対応するため、何種類かのカーネ
|
||||
ルが含まれているということも理解して欲しい。従って、ある一つのリ
|
||||
リースバージョンだけのためにモジュールを作成する場合でも、あなたは
|
||||
何バージョンものモジュールを用意しなければならない。
|
||||
|
||||
|
||||
信じて欲しい。このような方法でサポートを続けようとするなら、あなた
|
||||
はいずれ正気を失うだろう。遠い昔、私はそれがいかに困難なことか、身
|
||||
をもって学んだのだ・・・
|
||||
|
||||
|
||||
不変のカーネルソースレベルインターフェース
|
||||
------------------------------------------
|
||||
|
||||
メインカーネルツリーに含まれていない Linux カーネルドライバを継続
|
||||
してサポートしていこうとしている人たちとの議論においては、これは極
|
||||
めて「引火性の高い」話題である。[訳注(2)]
|
||||
|
||||
訳注(2)
|
||||
「引火性の高い」の原文は "volatile"。
|
||||
volatile には「揮発性の」「爆発しやすい」という意味の他、「変わり
|
||||
やすい」「移り気な」という意味がある。
|
||||
「(この話題は)爆発的に激しい論争を巻き起こしかねない」ということ
|
||||
を、「(カーネルのソースレベルインターフェースは)移ろい行くもので
|
||||
ある」ということを連想させる "volatile" という単語で表現している。
|
||||
|
||||
|
||||
Linux カーネルの開発は継続的に速いペースで行われ、決して歩みを緩め
|
||||
ることがない。その中でカーネル開発者達は、現状のインターフェースに
|
||||
あるバグを見つけ、より良い方法を考え出す。彼らはやがて、現状のイン
|
||||
ターフェースがより正しく動作するように修正を行う。その過程で関数の
|
||||
名前は変更されるかもしれず、構造体は大きく、または小さくなるかもし
|
||||
れず、関数の引数は検討しなおされるかもしれない。そのような場合、引
|
||||
き続き全てが正常に動作するよう、カーネル内でこれらのインターフェー
|
||||
スを使用している個所も全て同時に修正される。
|
||||
|
||||
|
||||
具体的な例として、カーネル内の USB インターフェースを挙げる。USB
|
||||
サブシステムはこれまでに少なくとも3回の書き直しが行われ、その結果
|
||||
インターフェースが変更された。これらの書き直しはいくつかの異なった
|
||||
問題を修正するために行われた。
|
||||
- 同期的データストリームが非同期に変更された。これにより多数のド
|
||||
ライバを単純化でき、全てのドライバのスループットが向上した。今
|
||||
やほとんど全ての USB デバイスは、考えられる最高の速度で動作し
|
||||
ている。
|
||||
- USB ドライバが USB サブシステムのコアから行う、データパケット
|
||||
用のメモリ確保方法が変更された。これに伴い、いくつもの文書化さ
|
||||
れたデッドロック条件を回避するため、全ての USB ドライバはより
|
||||
多くの情報を USB コアに提供しなければならないようになっている。
|
||||
|
||||
|
||||
このできごとは、数多く存在するクローズソースのオペレーティングシス
|
||||
テムとは全く対照的だ。それらは長期に渡り古い USB インターフェース
|
||||
をメンテナンスしなければならない。古いインターフェースが残ることで、
|
||||
新たな開発者が偶然古いインターフェースを使い、正しくない方法で開発
|
||||
を行ってしまう可能性が生じる。これによりシステムの安定性は危険にさ
|
||||
らされることになる。
|
||||
|
||||
|
||||
上に挙げたどちらの例においても、開発者達はその変更が重要かつ必要で
|
||||
あることに合意し、比較的楽にそれを実行した。もし Linux がソースレ
|
||||
ベルでインターフェースの不変性を保証しなければならないとしたら、新
|
||||
しいインターフェースを作ると同時に、古い、問題のある方を今後ともメ
|
||||
ンテナンスするという余計な仕事を USB の開発者にさせなければならな
|
||||
い。Linux の USB 開発者は、自分の時間を使って仕事をしている。よっ
|
||||
て、価値のない余計な仕事を報酬もなしに実行しろと言うことはできない。
|
||||
|
||||
|
||||
セキュリティ問題も、Linux にとっては非常に重要である。ひとたびセキ
|
||||
ュリティに関する問題が発見されれば、それは極めて短期間のうちに修正
|
||||
される。セキュリティ問題の発生を防ぐための修正は、カーネルの内部イ
|
||||
ンターフェースの変更を何度も引き起こしてきた。その際同時に、変更さ
|
||||
れたインターフェースを使用する全てのドライバもまた変更された。これ
|
||||
により問題が解消し、将来偶然に問題が再発してしまわないことが保証さ
|
||||
れる。もし内部インターフェースの変更が許されないとしたら、このよう
|
||||
にセキュリティ問題を修正し、将来再発しないことを保証することなど不
|
||||
可能なのだ。
|
||||
|
||||
|
||||
カーネルのインターフェースは時が経つにつれクリーンナップを受ける。
|
||||
誰も使っていないインターフェースは削除される。これにより、可能な限
|
||||
りカーネルが小さく保たれ、現役の全てのインターフェースが可能な限り
|
||||
テストされることを保証しているのだ。(使われていないインターフェー
|
||||
スの妥当性をテストすることは不可能と言っていいだろう)
|
||||
|
||||
|
||||
|
||||
これから何をすべきか
|
||||
-----------------------
|
||||
|
||||
では、もしメインのカーネルツリーに含まれない Linux カーネルドライ
|
||||
バがあったとして、あなたは、つまり開発者は何をするべきだろうか?全
|
||||
てのディストリビューションの全てのカーネルバージョン向けにバイナリ
|
||||
のドライバを供給することは悪夢であり、カーネルインターフェースの変
|
||||
更を追いかけ続けることもまた過酷な仕事だ。
|
||||
|
||||
|
||||
答えは簡単。そのドライバをメインのカーネルツリーに入れてしまえばよ
|
||||
い。(ここで言及しているのは、GPL に従って公開されるドライバのこと
|
||||
だということに注意してほしい。あなたのコードがそれに該当しないなら
|
||||
ば、さよなら。幸運を祈ります。ご自分で何とかしてください。Andrew
|
||||
と Linus からのコメント<Andrew と Linus のコメントへのリンクをこ
|
||||
こに置く>をどうぞ)ドライバがメインツリーに入れば、カーネルのイン
|
||||
ターフェースが変更された場合、変更を行った開発者によってドライバも
|
||||
修正されることになるだろう。あなたはほとんど労力を払うことなしに、
|
||||
常にビルド可能できちんと動作するドライバを手に入れることができる。
|
||||
|
||||
|
||||
ドライバをメインのカーネルツリーに入れると、非常に好ましい以下の効
|
||||
果がある。
|
||||
- ドライバの品質が向上する一方で、(元の開発者にとっての)メンテ
|
||||
ナンスコストは下がる。
|
||||
- あなたのドライバに他の開発者が機能を追加してくれる。
|
||||
- 誰かがあなたのドライバにあるバグを見つけ、修正してくれる。
|
||||
- 誰かがあなたのドライバにある改善点を見つけてくれる。
|
||||
- 外部インターフェースが変更されドライバの更新が必要になった場合、
|
||||
誰かがあなたの代わりに更新してくれる。
|
||||
- ドライバを入れてくれとディストロに頼まなくても、そのドライバは
|
||||
全ての Linux ディストリビューションに自動的に含まれてリリース
|
||||
される。
|
||||
|
||||
|
||||
Linux では、他のどのオペレーティングシステムよりも数多くのデバイス
|
||||
が「そのまま」使用できるようになった。また Linux は、どのオペレー
|
||||
ティングシステムよりも数多くのプロセッサアーキテクチャ上でそれらの
|
||||
デバイスを使用することができるようにもなった。このように、Linux の
|
||||
開発モデルは実証されており、今後も間違いなく正しい方向へと進んでい
|
||||
くだろう。:)
|
||||
|
||||
|
||||
|
||||
------
|
||||
|
||||
この文書の初期の草稿に対し、Randy Dunlap, Andrew Morton, David
|
||||
Brownell, Hanna Linder, Robert Love, Nishanth Aravamudan から査読
|
||||
と助言を頂きました。感謝申し上げます。
|
||||
|
@ -98,6 +98,15 @@ applicable everywhere (see syntax).
|
||||
times, the limit is set to the largest selection.
|
||||
Reverse dependencies can only be used with boolean or tristate
|
||||
symbols.
|
||||
Note:
|
||||
select is evil.... select will by brute force set a symbol
|
||||
equal to 'y' without visiting the dependencies. So abusing
|
||||
select you are able to select a symbol FOO even if FOO depends
|
||||
on BAR that is not set. In general use select only for
|
||||
non-visible symbols (no promts anywhere) and for symbols with
|
||||
no dependencies. That will limit the usefulness but on the
|
||||
other hand avoid the illegal configurations all over. kconfig
|
||||
should one day warn about such things.
|
||||
|
||||
- numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
|
||||
This allows to limit the range of possible input values for int
|
||||
|
@ -501,6 +501,20 @@ more details, with real examples.
|
||||
The third parameter may be a text as in this example, but it may also
|
||||
be an expanded variable or a macro.
|
||||
|
||||
cc-fullversion
|
||||
cc-fullversion is useful when the exact version of gcc is needed.
|
||||
One typical use-case is when a specific GCC version is broken.
|
||||
cc-fullversion points out a more specific version than cc-version does.
|
||||
|
||||
Example:
|
||||
#arch/powerpc/Makefile
|
||||
$(Q)if test "$(call cc-fullversion)" = "040200" ; then \
|
||||
echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
|
||||
false ; \
|
||||
fi
|
||||
|
||||
In this example for a specific GCC version the build will error out explaining
|
||||
to the user why it stops.
|
||||
|
||||
=== 4 Host Program support
|
||||
|
||||
|
@ -30,17 +30,18 @@ the beginning of each description states the restrictions within which a
|
||||
parameter is applicable:
|
||||
|
||||
ACPI ACPI support is enabled.
|
||||
AGP AGP (Accelerated Graphics Port) is enabled.
|
||||
ALSA ALSA sound support is enabled.
|
||||
APIC APIC support is enabled.
|
||||
APM Advanced Power Management support is enabled.
|
||||
AX25 Appropriate AX.25 support is enabled.
|
||||
BLACKFIN Blackfin architecture is enabled.
|
||||
DRM Direct Rendering Management support is enabled.
|
||||
EDD BIOS Enhanced Disk Drive Services (EDD) is enabled
|
||||
EFI EFI Partitioning (GPT) is enabled
|
||||
EIDE EIDE/ATAPI support is enabled.
|
||||
FB The frame buffer device is enabled.
|
||||
HW Appropriate hardware is enabled.
|
||||
IA-32 IA-32 aka i386 architecture is enabled.
|
||||
IA-64 IA-64 architecture is enabled.
|
||||
IOSCHED More than one I/O scheduler is enabled.
|
||||
IP_PNP IP DHCP, BOOTP, or RARP is enabled.
|
||||
@ -57,16 +58,17 @@ parameter is applicable:
|
||||
MDA MDA console support is enabled.
|
||||
MOUSE Appropriate mouse support is enabled.
|
||||
MSI Message Signaled Interrupts (PCI).
|
||||
MTD MTD support is enabled.
|
||||
MTD MTD (Memory Technology Device) support is enabled.
|
||||
NET Appropriate network support is enabled.
|
||||
NUMA NUMA support is enabled.
|
||||
GENERIC_TIME The generic timeofday code is enabled.
|
||||
NFS Appropriate NFS support is enabled.
|
||||
OSS OSS sound support is enabled.
|
||||
PV_OPS A paravirtualized kernel
|
||||
PARIDE The ParIDE subsystem is enabled.
|
||||
PV_OPS A paravirtualized kernel is enabled.
|
||||
PARIDE The ParIDE (parallel port IDE) subsystem is enabled.
|
||||
PARISC The PA-RISC architecture is enabled.
|
||||
PCI PCI bus support is enabled.
|
||||
PCIE PCI Express support is enabled.
|
||||
PCMCIA The PCMCIA subsystem is enabled.
|
||||
PNP Plug & Play support is enabled.
|
||||
PPC PowerPC architecture is enabled.
|
||||
@ -91,6 +93,7 @@ parameter is applicable:
|
||||
VT Virtual terminal support is enabled.
|
||||
WDT Watchdog support is enabled.
|
||||
XT IBM PC/XT MFM hard disk support is enabled.
|
||||
X86-32 X86-32, aka i386 architecture is enabled.
|
||||
X86-64 X86-64 architecture is enabled.
|
||||
More X86-64 boot options can be found in
|
||||
Documentation/x86_64/boot-options.txt .
|
||||
@ -122,10 +125,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
./include/asm/setup.h as COMMAND_LINE_SIZE.
|
||||
|
||||
|
||||
53c7xx= [HW,SCSI] Amiga SCSI controllers
|
||||
See header of drivers/scsi/53c7xx.c.
|
||||
See also Documentation/scsi/ncr53c7xx.txt.
|
||||
|
||||
acpi= [HW,ACPI,X86-64,i386]
|
||||
Advanced Configuration and Power Interface
|
||||
Format: { force | off | ht | strict | noirq }
|
||||
@ -166,6 +165,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA
|
||||
Format: <irq>,<irq>...
|
||||
|
||||
acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT
|
||||
|
||||
acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS
|
||||
Format: To spoof as Windows 98: ="Microsoft Windows"
|
||||
|
||||
@ -222,11 +223,17 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
acpi_fake_ecdt [HW,ACPI] Workaround failure due to BIOS lacking ECDT
|
||||
|
||||
acpi_pm_good [IA-32,X86-64]
|
||||
acpi_pm_good [X86-32,X86-64]
|
||||
Override the pmtimer bug detection: force the kernel
|
||||
to assume that this machine's pmtimer latches its value
|
||||
and always returns good values.
|
||||
|
||||
agp= [AGP]
|
||||
{ off | try_unsupported }
|
||||
off: disable AGP support
|
||||
try_unsupported: try to drive unsupported chipsets
|
||||
(may crash computer or cause data corruption)
|
||||
|
||||
enable_timer_pin_1 [i386,x86-64]
|
||||
Enable PIN 1 of APIC timer
|
||||
Can be useful to work around chipset bugs
|
||||
@ -279,7 +286,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
not play well with APC CPU idle - disable it if you have
|
||||
APC and your system crashes randomly.
|
||||
|
||||
apic= [APIC,i386] Change the output verbosity whilst booting
|
||||
apic= [APIC,i386] Advanced Programmable Interrupt Controller
|
||||
Change the output verbosity whilst booting
|
||||
Format: { quiet (default) | verbose | debug }
|
||||
Change the amount of debugging information output
|
||||
when initialising the APIC and IO-APIC components.
|
||||
@ -353,7 +361,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
c101= [NET] Moxa C101 synchronous serial card
|
||||
|
||||
cachesize= [BUGS=IA-32] Override level 2 CPU cache size detection.
|
||||
cachesize= [BUGS=X86-32] Override level 2 CPU cache size detection.
|
||||
Sometimes CPU hardware bugs make them report the cache
|
||||
size incorrectly. The kernel will attempt work arounds
|
||||
to fix known problems, but for some CPUs it is not
|
||||
@ -372,7 +380,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
Value can be changed at runtime via
|
||||
/selinux/checkreqprot.
|
||||
|
||||
clock= [BUGS=IA-32, HW] gettimeofday clocksource override.
|
||||
clock= [BUGS=X86-32, HW] gettimeofday clocksource override.
|
||||
[Deprecated]
|
||||
Forces specified clocksource (if available) to be used
|
||||
when calculating gettimeofday(). If specified
|
||||
@ -390,7 +398,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
[ARM] imx_timer1,OSTS,netx_timer,mpu_timer2,
|
||||
pxa_timer,timer3,32k_counter,timer0_1
|
||||
[AVR32] avr32
|
||||
[IA-32] pit,hpet,tsc,vmi-timer;
|
||||
[X86-32] pit,hpet,tsc,vmi-timer;
|
||||
scx200_hrt on Geode; cyclone on IBM x440
|
||||
[MIPS] MIPS
|
||||
[PARISC] cr16
|
||||
@ -410,7 +418,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
over the 8254 in addition to over the IO-APIC. The
|
||||
kernel tries to set a sensible default.
|
||||
|
||||
hpet= [IA-32,HPET] option to disable HPET and use PIT.
|
||||
hpet= [X86-32,HPET] option to disable HPET and use PIT.
|
||||
Format: disable
|
||||
|
||||
com20020= [HW,NET] ARCnet - COM20020 chipset
|
||||
@ -462,9 +470,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
Format:
|
||||
<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
|
||||
|
||||
cpia_pp= [HW,PPT]
|
||||
Format: { parport<nr> | auto | none }
|
||||
|
||||
crashkernel=nn[KMG]@ss[KMG]
|
||||
[KNL] Reserve a chunk of physical memory to
|
||||
hold a kernel to switch to with kexec on panic.
|
||||
@ -547,7 +552,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
dtc3181e= [HW,SCSI]
|
||||
|
||||
earlyprintk= [IA-32,X86-64,SH]
|
||||
earlyprintk= [X86-32,X86-64,SH,BLACKFIN]
|
||||
earlyprintk=vga
|
||||
earlyprintk=serial[,ttySn[,baudrate]]
|
||||
|
||||
@ -585,7 +590,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
eisa_irq_edge= [PARISC,HW]
|
||||
See header of drivers/parisc/eisa.c.
|
||||
|
||||
elanfreq= [IA-32]
|
||||
elanfreq= [X86-32]
|
||||
See comment before function elanfreq_setup() in
|
||||
arch/i386/kernel/cpu/cpufreq/elanfreq.c.
|
||||
|
||||
@ -594,7 +599,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
See Documentation/block/as-iosched.txt and
|
||||
Documentation/block/deadline-iosched.txt for details.
|
||||
|
||||
elfcorehdr= [IA-32, X86_64]
|
||||
elfcorehdr= [X86-32, X86_64]
|
||||
Specifies physical address of start of kernel core
|
||||
image elf header. Generally kexec loader will
|
||||
pass this option to capture kernel.
|
||||
@ -676,7 +681,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
hisax= [HW,ISDN]
|
||||
See Documentation/isdn/README.HiSax.
|
||||
|
||||
hugepages= [HW,IA-32,IA-64] Maximal number of HugeTLB pages.
|
||||
hugepages= [HW,X86-32,IA-64] Maximal number of HugeTLB pages.
|
||||
|
||||
i8042.direct [HW] Put keyboard port into non-translated mode
|
||||
i8042.dumbkbd [HW] Pretend that controller can only read data from
|
||||
@ -768,7 +773,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
See Documentation/nfsroot.txt.
|
||||
|
||||
ip2= [HW] Set IO/IRQ pairs for up to 4 IntelliPort boards
|
||||
See comment before ip2_setup() in drivers/char/ip2.c.
|
||||
See comment before ip2_setup() in
|
||||
drivers/char/ip2/ip2base.c.
|
||||
|
||||
ips= [HW,SCSI] Adaptec / IBM ServeRAID controller
|
||||
See header of drivers/scsi/ips.c.
|
||||
@ -817,7 +823,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
js= [HW,JOY] Analog joystick
|
||||
See Documentation/input/joystick.txt.
|
||||
|
||||
kernelcore=nn[KMG] [KNL,IA-32,IA-64,PPC,X86-64] This parameter
|
||||
kernelcore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
|
||||
specifies the amount of memory usable by the kernel
|
||||
for non-movable allocations. The requested amount is
|
||||
spread evenly throughout all nodes in the system. The
|
||||
@ -833,7 +839,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
use the HighMem zone if it exists, and the Normal
|
||||
zone if it does not.
|
||||
|
||||
movablecore=nn[KMG] [KNL,IA-32,IA-64,PPC,X86-64] This parameter
|
||||
movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
|
||||
is similar to kernelcore except it specifies the
|
||||
amount of memory used for migratable allocations.
|
||||
If both kernelcore and movablecore is specified,
|
||||
@ -845,27 +851,23 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
keepinitrd [HW,ARM]
|
||||
|
||||
kstack=N [IA-32,X86-64] Print N words from the kernel stack
|
||||
kstack=N [X86-32,X86-64] Print N words from the kernel stack
|
||||
in oops dumps.
|
||||
|
||||
l2cr= [PPC]
|
||||
|
||||
lapic [IA-32,APIC] Enable the local APIC even if BIOS
|
||||
lapic [X86-32,APIC] Enable the local APIC even if BIOS
|
||||
disabled it.
|
||||
|
||||
lapic_timer_c2_ok [IA-32,x86-64,APIC] trust the local apic timer in
|
||||
lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer in
|
||||
C2 power state.
|
||||
|
||||
lasi= [HW,SCSI] PARISC LASI driver for the 53c700 chip
|
||||
Format: addr:<io>,irq:<irq>
|
||||
|
||||
legacy_serial.force [HW,IA-32,X86-64]
|
||||
Probe for COM ports at legacy addresses even
|
||||
if PNPBIOS or ACPI should describe them. This
|
||||
is for working around firmware defects.
|
||||
|
||||
llsc*= [IA64] See function print_params() in
|
||||
arch/ia64/sn/kernel/llsc4.c.
|
||||
libata.noacpi [LIBATA] Disables use of ACPI in libata suspend/resume
|
||||
when set.
|
||||
Format: <int>
|
||||
|
||||
load_ramdisk= [RAM] List of ramdisks to load from floppy
|
||||
See Documentation/ramdisk.txt.
|
||||
@ -953,14 +955,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
Format: <1-256>
|
||||
|
||||
maxcpus= [SMP] Maximum number of processors that an SMP kernel
|
||||
should make use of.
|
||||
Using "nosmp" or "maxcpus=0" will disable SMP
|
||||
entirely (the MPS table probe still happens, though).
|
||||
A command-line option of "maxcpus=<NUM>", where <NUM>
|
||||
is an integer greater than 0, limits the maximum number
|
||||
of CPUs activated in SMP mode to <NUM>.
|
||||
Using "maxcpus=1" on an SMP kernel is the trivial
|
||||
case of an SMP kernel with only one CPU.
|
||||
should make use of. maxcpus=n : n >= 0 limits the
|
||||
kernel to using 'n' processors. n=0 is a special case,
|
||||
it is equivalent to "nosmp", which also disables
|
||||
the IO APIC.
|
||||
|
||||
max_addr=[KMG] [KNL,BOOT,ia64] All physical memory greater than or
|
||||
equal to this physical address is ignored.
|
||||
@ -972,11 +970,11 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
[SCSI] Maximum number of LUNs received.
|
||||
Should be between 1 and 16384.
|
||||
|
||||
mca-pentium [BUGS=IA-32]
|
||||
mca-pentium [BUGS=X86-32]
|
||||
|
||||
mcatest= [IA-64]
|
||||
|
||||
mce [IA-32] Machine Check Exception
|
||||
mce [X86-32] Machine Check Exception
|
||||
|
||||
md= [HW] RAID subsystems devices and level
|
||||
See Documentation/md.txt.
|
||||
@ -988,14 +986,14 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
|
||||
Amount of memory to be used when the kernel is not able
|
||||
to see the whole system memory or for test.
|
||||
[IA-32] Use together with memmap= to avoid physical
|
||||
[X86-32] Use together with memmap= to avoid physical
|
||||
address space collisions. Without memmap= PCI devices
|
||||
could be placed at addresses belonging to unused RAM.
|
||||
|
||||
mem=nopentium [BUGS=IA-32] Disable usage of 4MB pages for kernel
|
||||
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
|
||||
memory.
|
||||
|
||||
memmap=exactmap [KNL,IA-32,X86_64] Enable setting of an exact
|
||||
memmap=exactmap [KNL,X86-32,X86_64] Enable setting of an exact
|
||||
E820 memory map, as specified by the user.
|
||||
Such memmap=exactmap lines can be constructed based on
|
||||
BIOS output or other requirements. See the memmap=nn@ss
|
||||
@ -1016,6 +1014,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
meye.*= [HW] Set MotionEye Camera parameters
|
||||
See Documentation/video4linux/meye.txt.
|
||||
|
||||
mfgpt_irq= [IA-32] Specify the IRQ to use for the
|
||||
Multi-Function General Purpose Timers on AMD Geode
|
||||
platforms.
|
||||
|
||||
mga= [HW,DRM]
|
||||
|
||||
mousedev.tap_time=
|
||||
@ -1039,7 +1041,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
<name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>]
|
||||
|
||||
mtdparts= [MTD]
|
||||
See drivers/mtd/cmdline.c.
|
||||
See drivers/mtd/cmdlinepart.c.
|
||||
|
||||
mtouchusb.raw_coordinates=
|
||||
[HW] Make the MicroTouch USB driver use raw coordinates
|
||||
@ -1081,16 +1083,12 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
[NFS] set the maximum lifetime for idmapper cache
|
||||
entries.
|
||||
|
||||
nmi_watchdog= [KNL,BUGS=IA-32] Debugging features for SMP kernels
|
||||
nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels
|
||||
|
||||
no387 [BUGS=IA-32] Tells the kernel to use the 387 maths
|
||||
no387 [BUGS=X86-32] Tells the kernel to use the 387 maths
|
||||
emulation library even if a 387 maths coprocessor
|
||||
is present.
|
||||
|
||||
noacpi [LIBATA] Disables use of ACPI in libata suspend/resume
|
||||
when set.
|
||||
Format: <int>
|
||||
|
||||
noaliencache [MM, NUMA, SLAB] Disables the allocation of alien
|
||||
caches in the slab allocator. Saves per-node memory,
|
||||
but will impact performance.
|
||||
@ -1114,17 +1112,17 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
noexec [IA-64]
|
||||
|
||||
noexec [IA-32,X86-64]
|
||||
noexec [X86-32,X86-64]
|
||||
noexec=on: enable non-executable mappings (default)
|
||||
noexec=off: disable nn-executable mappings
|
||||
|
||||
nofxsr [BUGS=IA-32] Disables x86 floating point extended
|
||||
nofxsr [BUGS=X86-32] Disables x86 floating point extended
|
||||
register save and restore. The kernel will only save
|
||||
legacy floating-point registers on task switch.
|
||||
|
||||
nohlt [BUGS=ARM]
|
||||
|
||||
no-hlt [BUGS=IA-32] Tells the kernel that the hlt
|
||||
no-hlt [BUGS=X86-32] Tells the kernel that the hlt
|
||||
instruction doesn't work correctly and not to
|
||||
use it.
|
||||
|
||||
@ -1139,12 +1137,12 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
Valid arguments: on, off
|
||||
Default: on
|
||||
|
||||
noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing
|
||||
noirqbalance [X86-32,SMP,KNL] Disable kernel irq balancing
|
||||
|
||||
noirqdebug [IA-32] Disables the code which attempts to detect and
|
||||
noirqdebug [X86-32] Disables the code which attempts to detect and
|
||||
disable unhandled interrupt sources.
|
||||
|
||||
no_timer_check [IA-32,X86_64,APIC] Disables the code which tests for
|
||||
no_timer_check [X86-32,X86_64,APIC] Disables the code which tests for
|
||||
broken timer IRQ sources.
|
||||
|
||||
noisapnp [ISAPNP] Disables ISA PnP code.
|
||||
@ -1154,20 +1152,25 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
nointroute [IA-64]
|
||||
|
||||
nolapic [IA-32,APIC] Do not enable or use the local APIC.
|
||||
nojitter [IA64] Disables jitter checking for ITC timers.
|
||||
|
||||
nolapic_timer [IA-32,APIC] Do not use the local APIC timer.
|
||||
nolapic [X86-32,APIC] Do not enable or use the local APIC.
|
||||
|
||||
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
||||
|
||||
noltlbs [PPC] Do not use large page/tlb entries for kernel
|
||||
lowmem mapping on PPC40x.
|
||||
|
||||
nomca [IA-64] Disable machine check abort handling
|
||||
|
||||
nomce [IA-32] Machine Check Exception
|
||||
nomce [X86-32] Machine Check Exception
|
||||
|
||||
noreplace-paravirt [IA-32,PV_OPS] Don't patch paravirt_ops
|
||||
nomfgpt [X86-32] Disable Multi-Function General Purpose
|
||||
Timer usage (for AMD Geode machines).
|
||||
|
||||
noreplace-smp [IA-32,SMP] Don't replace SMP instructions
|
||||
noreplace-paravirt [X86-32,PV_OPS] Don't patch paravirt_ops
|
||||
|
||||
noreplace-smp [X86-32,SMP] Don't replace SMP instructions
|
||||
with UP alternatives
|
||||
|
||||
noresidual [PPC] Don't use residual data on PReP machines.
|
||||
@ -1181,15 +1184,16 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
nosbagart [IA-64]
|
||||
|
||||
nosep [BUGS=IA-32] Disables x86 SYSENTER/SYSEXIT support.
|
||||
nosep [BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
|
||||
|
||||
nosmp [SMP] Tells an SMP kernel to act as a UP kernel.
|
||||
nosmp [SMP] Tells an SMP kernel to act as a UP kernel,
|
||||
and disable the IO APIC. legacy for "maxcpus=0".
|
||||
|
||||
nosoftlockup [KNL] Disable the soft-lockup detector.
|
||||
|
||||
nosync [HW,M68K] Disables sync negotiation for all devices.
|
||||
|
||||
notsc [BUGS=IA-32] Disable Time Stamp Counter
|
||||
notsc [BUGS=X86-32] Disable Time Stamp Counter
|
||||
|
||||
nousb [USB] Disable the USB subsystem
|
||||
|
||||
@ -1262,28 +1266,33 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
See also Documentation/paride.txt.
|
||||
|
||||
pci=option[,option...] [PCI] various PCI subsystem options:
|
||||
off [IA-32] don't probe for the PCI bus
|
||||
bios [IA-32] force use of PCI BIOS, don't access
|
||||
off [X86-32] don't probe for the PCI bus
|
||||
bios [X86-32] force use of PCI BIOS, don't access
|
||||
the hardware directly. Use this if your machine
|
||||
has a non-standard PCI host bridge.
|
||||
nobios [IA-32] disallow use of PCI BIOS, only direct
|
||||
nobios [X86-32] disallow use of PCI BIOS, only direct
|
||||
hardware access methods are allowed. Use this
|
||||
if you experience crashes upon bootup and you
|
||||
suspect they are caused by the BIOS.
|
||||
conf1 [IA-32] Force use of PCI Configuration
|
||||
conf1 [X86-32] Force use of PCI Configuration
|
||||
Mechanism 1.
|
||||
conf2 [IA-32] Force use of PCI Configuration
|
||||
conf2 [X86-32] Force use of PCI Configuration
|
||||
Mechanism 2.
|
||||
nommconf [IA-32,X86_64] Disable use of MMCONFIG for PCI
|
||||
noaer [PCIE] If the PCIEAER kernel config parameter is
|
||||
enabled, this kernel boot option can be used to
|
||||
disable the use of PCIE advanced error reporting.
|
||||
nodomains [PCI] Disable support for multiple PCI
|
||||
root domains (aka PCI segments, in ACPI-speak).
|
||||
nommconf [X86-32,X86_64] Disable use of MMCONFIG for PCI
|
||||
Configuration
|
||||
nomsi [MSI] If the PCI_MSI kernel config parameter is
|
||||
enabled, this kernel boot option can be used to
|
||||
disable the use of MSI interrupts system-wide.
|
||||
nosort [IA-32] Don't sort PCI devices according to
|
||||
nosort [X86-32] Don't sort PCI devices according to
|
||||
order given by the PCI BIOS. This sorting is
|
||||
done to get a device order compatible with
|
||||
older kernels.
|
||||
biosirq [IA-32] Use PCI BIOS calls to get the interrupt
|
||||
biosirq [X86-32] Use PCI BIOS calls to get the interrupt
|
||||
routing table. These calls are known to be buggy
|
||||
on several machines and they hang the machine
|
||||
when used, but on other computers it's the only
|
||||
@ -1291,33 +1300,35 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
this option if the kernel is unable to allocate
|
||||
IRQs or discover secondary PCI buses on your
|
||||
motherboard.
|
||||
rom [IA-32] Assign address space to expansion ROMs.
|
||||
rom [X86-32] Assign address space to expansion ROMs.
|
||||
Use with caution as certain devices share
|
||||
address decoders between ROMs and other
|
||||
resources.
|
||||
irqmask=0xMMMM [IA-32] Set a bit mask of IRQs allowed to be
|
||||
irqmask=0xMMMM [X86-32] Set a bit mask of IRQs allowed to be
|
||||
assigned automatically to PCI devices. You can
|
||||
make the kernel exclude IRQs of your ISA cards
|
||||
this way.
|
||||
pirqaddr=0xAAAAA [IA-32] Specify the physical address
|
||||
pirqaddr=0xAAAAA [X86-32] Specify the physical address
|
||||
of the PIRQ table (normally generated
|
||||
by the BIOS) if it is outside the
|
||||
F0000h-100000h range.
|
||||
lastbus=N [IA-32] Scan all buses thru bus #N. Can be
|
||||
lastbus=N [X86-32] Scan all buses thru bus #N. Can be
|
||||
useful if the kernel is unable to find your
|
||||
secondary buses and you want to tell it
|
||||
explicitly which ones they are.
|
||||
assign-busses [IA-32] Always assign all PCI bus
|
||||
assign-busses [X86-32] Always assign all PCI bus
|
||||
numbers ourselves, overriding
|
||||
whatever the firmware may have done.
|
||||
usepirqmask [IA-32] Honor the possible IRQ mask stored
|
||||
usepirqmask [X86-32] Honor the possible IRQ mask stored
|
||||
in the BIOS $PIR table. This is needed on
|
||||
some systems with broken BIOSes, notably
|
||||
some HP Pavilion N5400 and Omnibook XE3
|
||||
notebooks. This will have no effect if ACPI
|
||||
IRQ routing is enabled.
|
||||
noacpi [IA-32] Do not use ACPI for IRQ routing
|
||||
noacpi [X86-32] Do not use ACPI for IRQ routing
|
||||
or for PCI scanning.
|
||||
use_crs [X86-32] Use _CRS for PCI resource
|
||||
allocation.
|
||||
routeirq Do IRQ routing for all PCI devices.
|
||||
This is normally done in pci_enable_device(),
|
||||
so this option is a temporary workaround
|
||||
@ -1434,6 +1445,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
pt. [PARIDE]
|
||||
See Documentation/paride.txt.
|
||||
|
||||
pty.legacy_count=
|
||||
[KNL] Number of legacy pty's. Overwrites compiled-in
|
||||
default number.
|
||||
|
||||
quiet [KNL] Disable most log messages
|
||||
|
||||
r128= [HW,DRM]
|
||||
@ -1465,13 +1480,13 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
Run specified binary instead of /init from the ramdisk,
|
||||
used for early userspace startup. See initrd.
|
||||
|
||||
reboot= [BUGS=IA-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
|
||||
reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
|
||||
Format: <reboot_mode>[,<reboot_mode2>[,...]]
|
||||
See arch/*/kernel/reboot.c or arch/*/kernel/process.c
|
||||
|
||||
reserve= [KNL,BUGS] Force the kernel to ignore some iomem area
|
||||
|
||||
reservetop= [IA-32]
|
||||
reservetop= [X86-32]
|
||||
Format: nn[KMG]
|
||||
Reserves a hole at the top of the kernel virtual
|
||||
address space.
|
||||
@ -1562,7 +1577,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
Value can be changed at runtime via
|
||||
/selinux/compat_net.
|
||||
|
||||
serialnumber [BUGS=IA-32]
|
||||
serialnumber [BUGS=X86-32]
|
||||
|
||||
sg_def_reserved_size= [SCSI]
|
||||
|
||||
@ -1615,7 +1630,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
smart2= [HW]
|
||||
Format: <io1>[,<io2>[,...,<io8>]]
|
||||
|
||||
smp-alt-once [IA-32,SMP] On a hotplug CPU system, only
|
||||
smp-alt-once [X86-32,SMP] On a hotplug CPU system, only
|
||||
attempt to substitute SMP alternatives once at boot.
|
||||
|
||||
smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices
|
||||
@ -1821,6 +1836,30 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
thash_entries= [KNL,NET]
|
||||
Set number of hash buckets for TCP connection
|
||||
|
||||
thermal.act= [HW,ACPI]
|
||||
-1: disable all active trip points in all thermal zones
|
||||
<degrees C>: override all lowest active trip points
|
||||
|
||||
thermal.crt= [HW,ACPI]
|
||||
-1: disable all critical trip points in all thermal zones
|
||||
<degrees C>: lower all critical trip points
|
||||
|
||||
thermal.nocrt= [HW,ACPI]
|
||||
Set to disable actions on ACPI thermal zone
|
||||
critical and hot trip points.
|
||||
|
||||
thermal.off= [HW,ACPI]
|
||||
1: disable ACPI thermal control
|
||||
|
||||
thermal.psv= [HW,ACPI]
|
||||
-1: disable all passive trip points
|
||||
<degrees C>: override all passive trip points to this value
|
||||
|
||||
thermal.tzp= [HW,ACPI]
|
||||
Specify global default ACPI thermal zone polling rate
|
||||
<deci-seconds>: poll all this frequency
|
||||
0: no polling (default)
|
||||
|
||||
time Show timing data prefixed to each printk message line
|
||||
[deprecated, see 'printk.time']
|
||||
|
||||
@ -1880,15 +1919,18 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
usbhid.mousepoll=
|
||||
[USBHID] The interval which mice are to be polled at.
|
||||
|
||||
vdso= [IA-32,SH]
|
||||
vdso= [X86-32,SH,x86-64]
|
||||
vdso=2: enable compat VDSO (default with COMPAT_VDSO)
|
||||
vdso=1: enable VDSO (default)
|
||||
vdso=0: disable VDSO mapping
|
||||
|
||||
vector= [IA-64,SMP]
|
||||
vector=percpu: enable percpu vector domain
|
||||
|
||||
video= [FB] Frame buffer configuration
|
||||
See Documentation/fb/modedb.txt.
|
||||
|
||||
vga= [BOOT,IA-32] Select a particular video mode
|
||||
vga= [BOOT,X86-32] Select a particular video mode
|
||||
See Documentation/i386/boot.txt and
|
||||
Documentation/svga.txt.
|
||||
Use vga=ask for menu.
|
||||
@ -1920,7 +1962,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
See header of drivers/scsi/wd7000.c.
|
||||
|
||||
wdt= [WDT] Watchdog
|
||||
See Documentation/watchdog/watchdog.txt.
|
||||
See Documentation/watchdog/wdt.txt.
|
||||
|
||||
xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks.
|
||||
xd_geo= See header of drivers/block/xd.c.
|
||||
|
@ -859,9 +859,8 @@ payload contents" for more information.
|
||||
void unregister_key_type(struct key_type *type);
|
||||
|
||||
|
||||
Under some circumstances, it may be desirable to desirable to deal with a
|
||||
bundle of keys. The facility provides access to the keyring type for managing
|
||||
such a bundle:
|
||||
Under some circumstances, it may be desirable to deal with a bundle of keys.
|
||||
The facility provides access to the keyring type for managing such a bundle:
|
||||
|
||||
struct key_type key_type_keyring;
|
||||
|
||||
|
623
Documentation/ko_KR/HOWTO
Normal file
623
Documentation/ko_KR/HOWTO
Normal file
@ -0,0 +1,623 @@
|
||||
NOTE:
|
||||
This is a version of Documentation/HOWTO translated into korean
|
||||
This document is maintained by minchan Kim < minchan.kim@gmail.com>
|
||||
If you find any difference between this document and the original file or
|
||||
a problem with the translation, please contact the maintainer of this file.
|
||||
|
||||
Please also note that the purpose of this file is to be easier to
|
||||
read for non English (read: korean) speakers and is not intended as
|
||||
a fork. So if you have any comments or updates for this file please
|
||||
try to update the original English file first.
|
||||
|
||||
==================================
|
||||
이 문서는
|
||||
Documentation/HOWTO
|
||||
의 한글 번역입니다.
|
||||
|
||||
역자: 김민찬 <minchan.kim@gmail.com >
|
||||
감수: 이제이미 <jamee.lee@samsung.com>
|
||||
==================================
|
||||
|
||||
어떻게 리눅스 커널 개발을 하는가
|
||||
---------------------------------
|
||||
|
||||
이 문서는 커널 개발에 있어 가장 중요한 문서이다. 이 문서는
|
||||
리눅스 커널 개발자가 되는 법과 리눅스 커널 개발 커뮤니티와 일하는
|
||||
법을 담고있다. 커널 프로그래밍의기술적인 측면과 관련된 내용들은
|
||||
포함하지 않으려고 하였지만 올바으로 여러분을 안내하는 데 도움이
|
||||
될 것이다.
|
||||
|
||||
이 문서에서 오래된 것을 발견하면 문서의 아래쪽에 나열된 메인트너에게
|
||||
패치를 보내달라.
|
||||
|
||||
|
||||
소개
|
||||
----
|
||||
|
||||
자, 여러분은 리눅스 커널 개발자가 되는 법을 배우고 싶은가? 아니면
|
||||
상사로부터"이 장치를 위한 리눅스 드라이버를 작성하시오"라는 말을
|
||||
들었는가? 이 문서는 여러분이 겪게 될 과정과 커뮤니티와 일하는 법을
|
||||
조언하여 여러분의 목적을 달성하기 위해 필요한 것 모두를 알려주는
|
||||
것이다.
|
||||
|
||||
커널은 대부분은 C로 작성되었어고 몇몇 아키텍쳐의 의존적인 부분은
|
||||
어셈블리로 작성되었다. 커널 개발을 위해 C를 잘 이해하고 있어야 한다.
|
||||
여러분이 특정 아키텍쳐의 low-level 개발을 할 것이 아니라면
|
||||
어셈블리(특정 아키텍쳐)는 잘 알아야 할 필요는 없다.
|
||||
다음의 참고서적들은 기본에 충실한 C 교육이나 수년간의 경험에 견주지는
|
||||
못하지만 적어도 참고 용도로는 좋을 것이다
|
||||
- "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
|
||||
- "Practical C Programming" by Steve Oualline [O'Reilly]
|
||||
- "C: A Reference Manual" by Harbison and Steele [Prentice Hall]
|
||||
|
||||
커널은 GNU C와 GNU 툴체인을 사용하여 작성되었다. 이 툴들은 ISO C89 표준을
|
||||
따르는 반면 표준에 있지 않은 많은 확장기능도 가지고 있다. 커널은 표준 C
|
||||
라이브러리와는 관계없이 freestanding C 환경이어서 C 표준의 일부는
|
||||
지원되지 않는다. 임의의 long long 나누기나 floating point는 지원되지 않는다.
|
||||
때론 이런 이유로 커널이 그런 확장 기능을 가진 툴체인을 가지고 만들어졌다는
|
||||
것이 이해하기 어려울 수도 있고 게다가 불행하게도 그런 것을 정확하게 설명하는
|
||||
어떤 참고문서도 있지 않다. 정보를 얻기 위해서는 gcc info (`info gcc`)페이지를
|
||||
살펴보라.
|
||||
|
||||
여러분은 기존의 개발 커뮤니티와 일하는 법을 배우려고 하고 있다는 것을
|
||||
기억하라. 코딩, 스타일, 절차에 관한 훌륭한 표준을 가진 사람들이 모인
|
||||
다양한 그룹이 있다. 이 표준들은 오랜동안 크고 지역적으로 분산된 팀들에
|
||||
의해 가장 좋은 방법으로 일하기위하여 찾은 것을 기초로 만들어져왔다.
|
||||
그 표준들은 문서화가 잘 되어 있기 때문에 가능한한 미리 많은 표준들에
|
||||
관하여 배우려고 시도하라. 다른 사람들은 여러분이나 여러분의 회사가
|
||||
일하는 방식에 적응하는 것을 원하지는 않는다.
|
||||
|
||||
|
||||
법적 문제
|
||||
---------
|
||||
|
||||
리눅스 커널 소스 코드는 GPL로 배포(release)되었다. 소스트리의 메인
|
||||
디렉토리에 있는 라이센스에 관하여 상세하게 쓰여 있는 COPYING이라는
|
||||
파일을 봐라.여러분이 라이센스에 관한 더 깊은 문제를 가지고 있다면
|
||||
리눅스 커널 메일링 리스트에 묻지말고 변호사와 연락하라. 메일링
|
||||
리스트들에 있는 사람들은 변호사가 아니기 때문에 법적 문제에 관하여
|
||||
그들의 말에 의지해서는 안된다.
|
||||
|
||||
GPL에 관한 잦은 질문들과 답변들은 다음을 참조하라.
|
||||
http://www.gnu.org/licenses/gpl-faq.html
|
||||
|
||||
|
||||
문서
|
||||
----
|
||||
|
||||
리눅스 커널 소스 트리는 커널 커뮤니티와 일하는 법을 배우기 위한 많은
|
||||
귀중한 문서들을 가지고 있다. 새로운 기능들이 커널에 들어가게 될 때,
|
||||
그 기능을 어떻게 사용하는지에 관한 설명을 위하여 새로운 문서 파일을
|
||||
추가하는 것을 권장한다. 커널이 유저스페이스로 노출하는 인터페이스를
|
||||
변경하게 되면 변경을 설명하는 메뉴얼 페이지들에 대한 패치나 정보를
|
||||
mtk-manpages@gmx.net의 메인트너에게 보낼 것을 권장한다.
|
||||
|
||||
다음은 커널 소스 트리에 있는 읽어야 할 파일들의 리스트이다.
|
||||
README
|
||||
이 파일은 리눅스 커널에 관하여 간단한 배경 설명과 커널을 설정하고
|
||||
빌드하기 위해 필요한 것을 설명한다. 커널에 입문하는 사람들은 여기서
|
||||
시작해야 한다.
|
||||
|
||||
Documentation/Changes
|
||||
이 파일은 커널을 성공적으로 빌드하고 실행시키기 위해 필요한 다양한
|
||||
소프트웨어 패키지들의 최소 버젼을 나열한다.
|
||||
|
||||
Documentation/CodingStyle
|
||||
이 문서는 리눅스 커널 코딩 스타일과 그렇게 한 몇몇 이유를 설명한다.
|
||||
모든 새로운 코드는 이 문서에 가이드라인들을 따라야 한다. 대부분의
|
||||
메인트너들은 이 규칙을 따르는 패치들만을 받아들일 것이고 많은 사람들이
|
||||
그 패치가 올바른 스타일일 경우만 코드를 검토할 것이다.
|
||||
|
||||
Documentation/SubmittingPatches
|
||||
Documentation/SubmittingDrivers
|
||||
이 파일들은 성공적으로 패치를 만들고 보내는 법을 다음의 내용들로
|
||||
굉장히 상세히 설명하고 있다(그러나 다음으로 한정되진 않는다).
|
||||
- Email 내용들
|
||||
- Email 양식
|
||||
- 그것을 누구에게 보낼지
|
||||
이러한 규칙들을 따르는 것이 성공을 보장하진 않는다(왜냐하면 모든
|
||||
패치들은 내용과 스타일에 관하여 면밀히 검토되기 때문이다).
|
||||
그러나 규칙을 따르지 않는다면 거의 성공하지도 못할 것이다.
|
||||
|
||||
올바른 패치들을 만드는 법에 관한 훌륭한 다른 문서들이 있다.
|
||||
"The Perfect Patch"
|
||||
http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
|
||||
"Linux kernel patch submission format"
|
||||
http://linux.yyz.us/patch-format.html
|
||||
|
||||
Documentation/stable_api_nonsense.txt
|
||||
이 문서는 의도적으로 커널이 변하지 않는 API를 갖지 않도록 결정한
|
||||
이유를 설명하며 다음과 같은 것들을 포함한다.
|
||||
- 서브시스템 shim-layer(호환성을 위해?)
|
||||
- 운영 체제들 간의 드라이버 이식성
|
||||
- 커널 소스 트리내에 빠른 변화를 늦추는 것(또는 빠른 변화를 막는 것)
|
||||
이 문서는 리눅스 개발 철학을 이해하는데 필수적이며 다른 운영체제에서
|
||||
리눅스로 옮겨오는 사람들에게는 매우 중요하다.
|
||||
|
||||
|
||||
Documentation/SecurityBugs
|
||||
여러분들이 리눅스 커널의 보안 문제를 발견했다고 생각한다면 이 문서에
|
||||
나온 단계에 따라서 커널 개발자들에게 알리고 그 문제를 해결할 수 있도록
|
||||
도와 달라.
|
||||
|
||||
Documentation/ManagementStyle
|
||||
이 문서는 리눅스 커널 메인트너들이 어떻게 그들의 방법론의 정신을
|
||||
어떻게 공유하고 운영하는지를 설명한다. 이것은 커널 개발에 입문하는
|
||||
모든 사람들(또는 커널 개발에 작은 호기심이라도 있는 사람들)이
|
||||
읽어야 할 중요한 문서이다. 왜냐하면 이 문서는 커널 메인트너들의
|
||||
독특한 행동에 관하여 흔히 있는 오해들과 혼란들을 해소하고 있기
|
||||
때문이다.
|
||||
|
||||
Documentation/stable_kernel_rules.txt
|
||||
이 문서는 안정적인 커널 배포가 이루어지는 규칙을 설명하고 있으며
|
||||
여러분들이 이러한 배포들 중 하나에 변경을 하길 원한다면
|
||||
무엇을 해야 하는지를 설명한다.
|
||||
|
||||
Documentation/kernel-docs.txt
|
||||
커널 개발에 관계된 외부 문서의 리스트이다. 커널 내의 포함된 문서들
|
||||
중에 여러분이 찾고 싶은 문서를 발견하지 못할 경우 이 리스트를
|
||||
살펴보라.
|
||||
|
||||
Documentation/applying-patches.txt
|
||||
패치가 무엇이며 그것을 커널의 다른 개발 브랜치들에 어떻게
|
||||
적용하는지에 관하여 자세히 설명 하고 있는 좋은 입문서이다.
|
||||
|
||||
커널은 소스 코드 그 자체에서 자동적으로 만들어질 수 있는 많은 문서들을
|
||||
가지고 있다. 이것은 커널 내의 API에 대한 모든 설명, 그리고 락킹을
|
||||
올바르게 처리하는 법에 관한 규칙을 포함하고 있다. 이 문서는
|
||||
Documentation/DocBook/ 디렉토리 내에서 만들어지며 PDF, Postscript, HTML,
|
||||
그리고 man 페이지들로 다음과 같이 실행하여 만들어 진다.
|
||||
make pdfdocs
|
||||
make psdocs
|
||||
make htmldocs
|
||||
make mandocs
|
||||
각각의 명령을 메인 커널 소스 디렉토리로부터 실행한다.
|
||||
|
||||
|
||||
커널 개발자가 되는 것
|
||||
---------------------
|
||||
|
||||
여러분이 리눅스 커널 개발에 관하여 아무것도 모른다면 Linux KernelNewbies
|
||||
프로젝트를 봐야 한다.
|
||||
http://kernelnewbies.org
|
||||
그곳은 거의 모든 종류의 기본적인 커널 개발 질문들(질문하기 전에 먼저
|
||||
아카이브를 찾아봐라. 과거에 이미 답변되었을 수도 있다)을 할수있는 도움이
|
||||
될만한 메일링 리스트가 있다. 또한 실시간으로 질문 할수 있는 IRC 채널도
|
||||
가지고 있으며 리눅스 커널 개발을 배우는 데 유용한 문서들을 보유하고 있다.
|
||||
|
||||
웹사이트는 코드구성, 서브시스템들, 그리고 현재 프로젝트들
|
||||
(트리 내, 외부에 존재하는)에 관한 기본적인 정보들을 가지고 있다. 또한
|
||||
그곳은 커널 컴파일이나 패치를 하는 법과 같은 기본적인 것들을 설명한다.
|
||||
|
||||
여러분이 어디서 시작해야 할진 모르지만 커널 개발 커뮤니티에 참여할 수
|
||||
있는 일들을 찾길 원한다면 리눅스 커널 Janitor 프로젝트를 살펴봐라.
|
||||
http://janitor.kernelnewbies.org/
|
||||
그곳은 시작하기에 아주 딱 좋은 곳이다. 그곳은 리눅스 커널 소스 트리내에
|
||||
간단히 정리되고 수정될 수 있는 문제들에 관하여 설명한다. 여러분은 이
|
||||
프로젝트를 대표하는 개발자들과 일하면서 자신의 패치를 리눅스 커널 트리에
|
||||
반영하기 위한 기본적인 것들을 배우게 될것이며 여러분이 아직 아이디어를
|
||||
가지고 있지 않다면 다음에 무엇을 해야할지에 관한 방향을 배울 수 있을
|
||||
것이다.
|
||||
|
||||
여러분들이 이미 커널 트리에 반영하길 원하는 코드 묶음을 가지고 있지만
|
||||
올바른 포맷으로 포장하는데 도움이 필요하다면 그러한 문제를 돕기 위해
|
||||
만들어진 kernel-mentors 프로젝트가 있다. 그곳은 메일링 리스트이며
|
||||
다음에서 참조할 수 있다.
|
||||
http://selenic.com/mailman/listinfo/kernel-mentors
|
||||
|
||||
리눅스 커널 코드에 실제 변경을 하기 전에 반드시 그 코드가 어떻게
|
||||
동작하는지 이해하고 있어야 한다. 코드를 분석하기 위하여 특정한 툴의
|
||||
도움을 빌려서라도 코드를 직접 읽는 것보다 좋은 것은 없다(대부분의
|
||||
자잘한 부분들은 잘 코멘트되어 있다). 그런 툴들 중에 특히 추천할만한
|
||||
것은 Linux Cross-Reference project이며 그것은 자기 참조 방식이며
|
||||
소스코드를 인덱스된 웹 페이지들의 형태로 보여준다. 최신의 멋진 커널
|
||||
코드 저장소는 다음을 통하여 참조할 수 있다.
|
||||
http://sosdg.org/~coywolf/lxr/
|
||||
|
||||
|
||||
개발 프로세스
|
||||
-------------
|
||||
|
||||
리눅스 커널 개발 프로세스는 현재 몇몇 다른 메인 커널 "브랜치들"과
|
||||
서브시스템에 특화된 커널 브랜치들로 구성된다. 몇몇 다른 메인
|
||||
브랜치들은 다음과 같다.
|
||||
- main 2.6.x 커널 트리
|
||||
- 2.6.x.y - 안정된 커널 트리
|
||||
- 2.6.x -git 커널 패치들
|
||||
- 2.6.x -mm 커널 패치들
|
||||
- 서브시스템을 위한 커널 트리들과 패치들
|
||||
|
||||
2.6.x 커널 트리
|
||||
---------------
|
||||
|
||||
2.6.x 커널들은 Linux Torvalds가 관리하며 kernel.org의 pub/linux/kernel/v2.6/
|
||||
디렉토리에서 참조될 수 있다.개발 프로세스는 다음과 같다.
|
||||
- 새로운 커널이 배포되자마자 2주의 시간이 주어진다. 이 기간동은
|
||||
메인트너들은 큰 diff들을 Linus에게 제출할 수 있다. 대개 이 패치들은
|
||||
몇 주 동안 -mm 커널내에 이미 있었던 것들이다. 큰 변경들을 제출하는 데
|
||||
선호되는 방법은 git(커널의 소스 관리 툴, 더 많은 정보들은 http://git.or.cz/
|
||||
에서 참조할 수 있다)를 사용하는 것이지만 순수한 패치파일의 형식으로 보내도
|
||||
것도 무관하다.
|
||||
- 2주 후에 -rc1 커널이 배포되며 지금부터는 전체 커널의 안정성에 영향을
|
||||
미칠수 있는 새로운 기능들을 포함하지 않는 패치들만을 추가될 수 있다.
|
||||
완전히 새로운 드라이버(혹은 파일시스템)는 -rc1 이후에만 받아들여진다는
|
||||
것을 기억해라. 왜냐하면 변경이 자체내에서만 발생하고 추가된 코드가
|
||||
드라이버 외부의 다른 부분에는 영향을 주지 않으므로 그런 변경은
|
||||
퇴보(regression)를 일으킬 만한 위험을 가지고 있지 않기 때문이다. -rc1이
|
||||
배포된 이후에 git를 사용하여 패치들을 Linus에게 보낼수 있지만 패치들은
|
||||
공식적인 메일링 리스트로 보내서 검토를 받을 필요가 있다.
|
||||
- 새로운 -rc는 Linus는 현재 git tree가 테스트 하기에 충분히 안정된 상태에
|
||||
있다고 판단될 때마다 배포된다. 목표는 새로운 -rc 커널을 매주 배포하는
|
||||
것이다.
|
||||
- 이러한 프로세스는 커널이 "준비"되었다고 여겨질때까지 계속된다.
|
||||
프로세스는 대체로 6주간 지속된다.
|
||||
- 각 -rc 배포에 있는 알려진 퇴보의 목록들은 다음 URI에 남겨진다.
|
||||
http://kernelnewbies.org/known_regressions
|
||||
|
||||
커널 배포에 있어서 언급할만한 가치가 있는 리눅스 커널 메일링 리스트의
|
||||
Andrew Morton의 글이 있다.
|
||||
"커널이 언제 배포될지는 아무로 모른다. 왜냐하면 배포는 알려진
|
||||
버그의 상황에 따라 배포되는 것이지 미리정해 놓은 시간에 따라
|
||||
배포되는 것은 아니기 때문이다."
|
||||
|
||||
2.6.x.y - 안정 커널 트리
|
||||
------------------------
|
||||
|
||||
4 자리 숫자로 이루어진 버젼의 커널들은 -stable 커널들이다. 그것들은 2.6.x
|
||||
커널에서 발견된 큰 퇴보들이나 보안 문제들 중 비교적 작고 중요한 수정들을
|
||||
포함한다.
|
||||
|
||||
이것은 가장 최근의 안정적인 커널을 원하는 사용자에게 추천되는 브랜치이며,
|
||||
개발/실험적 버젼을 테스트하는 것을 돕는데는 별로 관심이 없다.
|
||||
|
||||
어떤 2.6.x.y 커널도 사용가능하지 않다면 그때는 가장 높은 숫자의 2.6.x
|
||||
커널이 현재의 안정 커널이다.
|
||||
|
||||
2.6.x.y는 "stable" 팀<stable@kernel.org>에 의해 관리되며 거의 매번 격주로
|
||||
배포된다.
|
||||
|
||||
커널 트리 문서들 내에 Documentation/stable_kernel_rules.txt 파일은 어떤
|
||||
종류의 변경들이 -stable 트리로 들어왔는지와 배포 프로세스가 어떻게
|
||||
진행되는지를 설명한다.
|
||||
|
||||
|
||||
2.6.x -git 패치들
|
||||
------------------
|
||||
git 저장소(그러므로 -git이라는 이름이 붙음)에는 날마다 관리되는 Linus의
|
||||
커널 트리의 snapshot 들이 있다. 이 패치들은 일반적으로 날마다 배포되며
|
||||
Linus의 트리의 현재 상태를 나타낸다. 이 패치들은 정상적인지 조금도
|
||||
살펴보지 않고 자동적으로 생성된 것이므로 -rc 커널들 보다도 더 실험적이다.
|
||||
|
||||
2.6.x -mm 커널 패치들
|
||||
---------------------
|
||||
Andrew Morton에 의해 배포된 실험적인 커널 패치들이다. Andrew는 모든 다른
|
||||
서브시스템 커널 트리와 패치들을 가져와서 리눅스 커널 메일링 리스트로
|
||||
온 많은 패치들과 한데 묶는다. 이 트리는 새로운 기능들과 패치들을 위한
|
||||
장소를 제공하는 역할을 한다. 하나의 패치가 -mm에 한동안 있으면서 그 가치가
|
||||
증명되게 되면 Andrew나 서브시스템 메인트너는 그것을 메인라인에 포함시키기
|
||||
위하여 Linus에게 보낸다.
|
||||
|
||||
커널 트리에 포함하고 싶은 모든 새로운 패치들은 Linus에게 보내지기 전에
|
||||
-mm 트리에서 테스트를 하는 것을 적극 추천한다.
|
||||
|
||||
이 커널들은 안정되게 사용할 시스템에서에 실행하는 것은 적합하지 않으며
|
||||
다른 브랜치들의 어떤 것들보다 위험하다.
|
||||
|
||||
여러분이 커널 개발 프로세스를 돕길 원한다면 이 커널 배포들을 사용하고
|
||||
테스트한 후 어떤 문제를 발견하거나 또는 모든 것이 잘 동작한다면 리눅스
|
||||
커널 메일링 리스트로 피드백을 해달라.
|
||||
|
||||
이 커널들은 일반적으로 모든 다른 실험적인 패치들과 배포될 당시의
|
||||
사용가능한 메인라인 -git 커널들의 몇몇 변경을 포함한다.
|
||||
|
||||
-mm 커널들은 정해진 일정대로 배포되지 않는다. 하지만 대개 몇몇 -mm 커널들은
|
||||
각 -rc 커널(1부터 3이 흔함) 사이에서 배포된다.
|
||||
|
||||
서브시스템 커널 트리들과 패치들
|
||||
-------------------------------
|
||||
많은 다른 커널 서브시스템 개발자들은 커널의 다른 부분들에서 무슨 일이
|
||||
일어나고 있는지를 볼수 있도록 그들의 개발 트리를 공개한다. 이 트리들은
|
||||
위에서 설명하였던 것 처럼 -mm 커널 배포들로 합쳐진다.
|
||||
|
||||
다음은 활용가능한 커널 트리들을 나열한다.
|
||||
git trees:
|
||||
- Kbuild development tree, Sam Ravnborg < sam@ravnborg.org>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
|
||||
|
||||
- ACPI development tree, Len Brown <len.brown@intel.com >
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
|
||||
|
||||
- Block development tree, Jens Axboe <axboe@suse.de>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
|
||||
|
||||
- DRM development tree, Dave Airlie <airlied@linux.ie>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git
|
||||
|
||||
- ia64 development tree, Tony Luck < tony.luck@intel.com>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git
|
||||
|
||||
- infiniband, Roland Dreier <rolandd@cisco.com >
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
|
||||
|
||||
- libata, Jeff Garzik <jgarzik@pobox.com>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git
|
||||
|
||||
- network drivers, Jeff Garzik <jgarzik@pobox.com>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git
|
||||
|
||||
- pcmcia, Dominik Brodowski < linux@dominikbrodowski.net>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git
|
||||
|
||||
- SCSI, James Bottomley < James.Bottomley@SteelEye.com>
|
||||
git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
|
||||
|
||||
quilt trees:
|
||||
- USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman < gregkh@suse.de>
|
||||
kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
|
||||
- x86-64, partly i386, Andi Kleen < ak@suse.de>
|
||||
ftp.firstfloor.org:/pub/ak/x86_64/quilt/
|
||||
|
||||
다른 커널 트리들은 http://kernel.org/git와 MAINTAINERS 파일에서 참조할 수
|
||||
있다.
|
||||
|
||||
버그 보고
|
||||
---------
|
||||
bugzilla.kernel.org는 리눅스 커널 개발자들이 커널의 버그를 추적하는 곳이다.
|
||||
사용자들은 발견한 모든 버그들을 보고하기 위하여 이 툴을 사용할 것을 권장한다.
|
||||
kernel bugzilla를 사용하는 자세한 방법은 다음을 참조하라.
|
||||
http://test.kernel.org/bugzilla/faq.html
|
||||
|
||||
메인 커널 소스 디렉토리에 있는 REPORTING-BUGS 파일은 커널 버그일 것 같은
|
||||
것을 보고하는는 법에 관한 좋은 템플릿이고 문제를 추적하기 위해서 커널
|
||||
개발자들이 필요로 하는 정보가 무엇들인지를 상세히 설명하고 있다.
|
||||
|
||||
|
||||
버그 리포트들의 관리
|
||||
--------------------
|
||||
|
||||
여러분의 해킹 기술을 연습하는 가장 좋은 방법 중의 하는 다른 사람들이
|
||||
보고한 버그들을 수정하는 것이다. 여러분은 커널을 더욱 안정화시키는데
|
||||
도움을 줄 뿐만이 아니라 실제있는 문제들을 수정하는 법을 배우게 되고
|
||||
그와 함께 여러분들의 기술은 향상될 것이며 다른 개발자들이 여러분의
|
||||
존재에 대해 알게 될 것이다. 버그를 수정하는 것은 개발자들 사이에서
|
||||
점수를 얻을 수 있는 가장 좋은 방법중의 하나이다. 왜냐하면 많은 사람들은
|
||||
다른 사람들의 버그들을 수정하기 위하여 시간을 낭비하지 않기 때문이다.
|
||||
|
||||
이미 보고된 버그 리포트들을 가지고 작업하기 위해서 http://bugzilla.kernelorg를
|
||||
참조하라. 여러분이 앞으로 생겨날 버그 리포트들의 조언자가 되길 원한다면
|
||||
bugme-new 메일링 리스트나(새로운 버그 리포트들만이 이곳에서 메일로 전해진다)
|
||||
bugme-janitor 메일링 리스트(bugzilla에 모든 변화들이 여기서 메일로 전해진다)
|
||||
에 등록하면 된다.
|
||||
|
||||
http://lists.osdl.org/mailman/listinfo/bugme-new
|
||||
http://lists.osdl.org/mailman/listinfo/bugme-janitors
|
||||
|
||||
|
||||
|
||||
메일링 리스트들
|
||||
---------------
|
||||
|
||||
위의 몇몇 문서들이 설명하였지만 핵심 커널 개발자들의 대다수는
|
||||
리눅스 커널 메일링 리스트에 참여하고 있다. 리스트에 등록하고 해지하는
|
||||
방법에 관한 자세한 사항은 다음에서 참조할 수 있다.
|
||||
http://vger.kernel.org/vger-lists.html#linux-kernel
|
||||
웹상의 많은 다른 곳에도 메일링 리스트의 아카이브들이 있다.
|
||||
이러한 아카이브들을 찾으려면 검색 엔진을 사용하라. 예를 들어:
|
||||
http://dir.gmane.org/gmane.linux.kernel
|
||||
여러분이 새로운 문제에 관해 리스트에 올리기 전에 말하고 싶은 주제에 대한
|
||||
것을 아카이브에서 먼저 찾기를 강력히 권장한다. 이미 상세하게 토론된 많은
|
||||
것들이 메일링 리스트의 아카이브에 기록되어 있다.
|
||||
|
||||
각각의 커널 서브시스템들의 대부분은 자신들의 개발에 관한 노력들로 이루어진
|
||||
분리된 메일링 리스트를 따로 가지고 있다. 다른 그룹들이 무슨 리스트를 가지고
|
||||
있는지는 MAINTAINERS 파일을 참조하라.
|
||||
|
||||
많은 리스트들은 kernel.org에서 호스트되고 있다. 그 정보들은 다음에서 참조될 수 있다.
|
||||
http://vger.kernel.org/vger-lists.html
|
||||
|
||||
리스트들을 사용할 때는 올바른 예절을 따를 것을 유념해라.
|
||||
대단하진 않지만 다음 URL은 리스트(혹은 모든 리스트)와 대화하는 몇몇 간단한
|
||||
가이드라인을 가지고 있다.
|
||||
http://www.albion.com/netiquette/
|
||||
|
||||
여러 사람들이 여러분의 메일에 응답한다면 CC: 즉 수신 리스트는 꽤 커지게
|
||||
될 것이다. 아무 이유없이 CC에서 어떤 사람도 제거하거나 리스트 주소로만
|
||||
회신하지 마라. 메일을 보낸 사람으로서 하나를 받고 리스트로부터 또
|
||||
하나를 받아 두번 받는 것에 익숙하여 있으니 mail-header를 조작하려고 하지
|
||||
말아라. 사람들은 그런 것을 좋아하지 않을 것이다.
|
||||
|
||||
여러분의 회신의 문맥을 원래대로 유지해야 한다. 여러분들의 회신의 윗부분에
|
||||
"John 커널해커는 작성했다...."를 유지하며 여러분들의 의견을 그 메일의 윗부분에
|
||||
작성하지 말고 각 인용한 단락들 사이에 넣어라.
|
||||
|
||||
여러분들이 패치들을 메일에 넣는다면 그것들은 Documentation/SubmittingPatches에
|
||||
나와있는데로 명백히(plain) 읽을 수 있는 텍스트여야 한다. 커널 개발자들은
|
||||
첨부파일이나 압축된 패치들을 원하지 않는다. 그들은 여러분들의 패치의
|
||||
각 라인 단위로 코멘트를 하길 원하며 압축하거나 첨부하지 않고 보내는 것이
|
||||
그렇게 할 수 있는 유일한 방법이다. 여러분들이 사용하는 메일 프로그램이
|
||||
스페이스나 탭 문자들을 조작하지 않는지 확인하라. 가장 좋은 첫 테스트는
|
||||
메일을 자신에게 보내보고 스스로 그 패치를 적용해보라. 그것이 동작하지
|
||||
않는다면 여러분의 메일 프로그램을 고치던가 제대로 동작하는 프로그램으로
|
||||
바꾸어라.
|
||||
|
||||
무엇보다도 메일링 리스트의 다른 구독자들에게 보여주려 한다는 것을 기억하라.
|
||||
|
||||
|
||||
커뮤니티와 일하는 법
|
||||
--------------------
|
||||
|
||||
커널 커뮤니티의 목적은 가능한한 가장 좋은 커널을 제공하는 것이다. 여러분이
|
||||
받아들여질 패치를 제출하게 되면 그 패치의 기술적인 이점으로 검토될 것이다.
|
||||
그럼 여러분들은 무엇을 기대하고 있어야 하는가?
|
||||
- 비판
|
||||
- 의견
|
||||
- 변경을 위한 요구
|
||||
- 당위성을 위한 요구
|
||||
- 고요
|
||||
|
||||
기억하라. 이것들은 여러분의 패치가 커널로 들어가기 위한 과정이다. 여러분의
|
||||
패치들은 비판과 다른 의견을 받을 수 있고 그것들을 기술적인 레벨로 평가하고
|
||||
재작업하거나 또는 왜 수정하면 안되는지에 관하여 명료하고 간결한 이유를
|
||||
말할 수 있어야 한다. 여러분이 제출한 것에 어떤 응답도 있지 않다면 몇 일을
|
||||
기다려보고 다시 시도해라. 때론 너무 많은 메일들 속에 묻혀버리기도 한다.
|
||||
|
||||
여러분은 무엇을 해서는 안되는가?
|
||||
- 여러분의 패치가 아무 질문 없이 받아들여지기를 기대하는 것
|
||||
- 방어적이 되는 것
|
||||
- 의견을 무시하는 것
|
||||
- 요청된 변경을 하지 않고 패치를 다시 제출하는 것
|
||||
|
||||
가능한한 가장 좋은 기술적인 해답을 찾고 있는 커뮤니티에서는 항상
|
||||
어떤 패치가 얼마나 좋은지에 관하여 다른 의견들이 있을 수 있다. 여러분은
|
||||
협조적이어야 하고 기꺼이 여러분의 생각을 커널 내에 맞추어야 한다. 아니면
|
||||
적어도 여러분의 것이 가치있다는 것을 중명하여야 한다. 잘못된 것도 여러분이
|
||||
올바른 방향의 해결책으로 이끌어갈 의지가 있다면 받아들여질 것이라는 점을
|
||||
기억하라.
|
||||
|
||||
여러분의 첫 패치에 여러분이 수정해야하는 십여개 정도의 회신이 오는
|
||||
경우도 흔하다. 이것은 여러분의 패치가 받아들여지지 않을 것이라는 것을
|
||||
의미하는 것이 아니고 개인적으로 여러분에게 감정이 있어서 그러는 것도
|
||||
아니다. 간단히 여러분의 패치에 제기된 문제들을 수정하고 그것을 다시
|
||||
보내라.
|
||||
|
||||
|
||||
커널 커뮤니티와 기업 조직간의 차이점
|
||||
-----------------------------------------------------------------
|
||||
커널 커뮤니티는 가장 전통적인 회사의 개발 환경과는 다르다. 여기에 여러분들의
|
||||
문제를 피하기 위한 목록이 있다.
|
||||
여러분들이 제안한 변경들에 관하여 말할 때 좋은 것들 :
|
||||
- " 이것은 여러 문제들을 해겹합니다."
|
||||
- "이것은 2000 라인의 코드를 제거합니다."
|
||||
- "이것은 내가 말하려는 것에 관해 설명하는 패치입니다."
|
||||
- "나는 5개의 다른 아키텍쳐에서 그것을 테스트했슴으로..."
|
||||
- "여기에 일련의 작은 패치들이 있습음로..."
|
||||
- "이것은 일반적인 머신에서 성능을 향상시키므로..."
|
||||
|
||||
여러분들이 말할 때 피해야 할 좋지 않은 것들 :
|
||||
- "우리를 그것을 AIT/ptx/Solaris에서 이러한 방법으로 했다. 그러므로 그것은 좋은 것임에 틀립없다..."
|
||||
- "나는 20년동안 이것을 해왔다. 그러므로..."
|
||||
- "이것은 돈을 벌기위해 나의 회사가 필요로 하는 것이다."
|
||||
- "이것은 우리의 엔터프라이즈 상품 라인을 위한 것이다."
|
||||
- "여기에 나의 생각을 말하고 있는 1000 페이지 설계 문서가 있다."
|
||||
- "나는 6달동안 이것을 했으니..."
|
||||
- "여기세 5000라인 짜리 패치가 있으니..."
|
||||
- "나는 현재 뒤죽박죽인 것을 재작성했다. 그리고 여기에..."
|
||||
- "나는 마감시한을 가지고 있으므로 이 패치는 지금 적용될 필요가 있다."
|
||||
|
||||
커널 커뮤니티가 전통적인 소프트웨어 엔지니어링 개발 환경들과
|
||||
또 다른 점은 얼굴을 보지 않고 일한다는 점이다. 이메일과 irc를 대화의
|
||||
주요수단으로 사용하는 것의 한가지 장점은 성별이나 인종의 차별이
|
||||
없다는 것이다. 리눅스 커널의 작업 환경에서는 단지 이메일 주소만
|
||||
알수 있기 때문에 여성과 소수 민족들도 모두 받아들여진다. 국제적으로
|
||||
일하게 되는 측면은 사람의 이름에 근거하여 성별을 추측할 수 없게
|
||||
하기때문에 차별을 없애는 데 도움을 준다. Andrea라는 이름을 가진 남자와
|
||||
Pat이라는 이름을 가진 여자가 있을 수도 있는 것이다. 리눅스 커널에서
|
||||
작업하며 생각을 표현해왔던 대부분의 여성들은 긍정적인 경험을 가지고
|
||||
있다.
|
||||
|
||||
언어 장벽은 영어에 익숙하지 않은 몇몇 사람들에게 문제가 될 수도 있다.
|
||||
언어의 훌륭한 구사는 메일링 리스트에서 올바르게 자신의 생각을
|
||||
표현하기 위하여 필요하다. 그래서 여러분은 이메일을 보내기 전에
|
||||
영어를 올바르게 사용하고 있는지를 체크하는 것이 바람직하다.
|
||||
|
||||
|
||||
여러분의 변경을 나누어라
|
||||
------------------------
|
||||
|
||||
리눅스 커널 커뮤니티는 한꺼번에 굉장히 큰 코드의 묶음을 쉽게
|
||||
받아들이지 않는다. 변경은 적절하게 소개되고, 검토되고, 각각의
|
||||
부분으로 작게 나누어져야 한다. 이것은 회사에서 하는 것과는 정확히
|
||||
반대되는 것이다. 여러분들의 제안은 개발 초기에 일찍이 소개되야 한다.
|
||||
그래서 여러분들은 자신이 하고 있는 것에 관하여 피드백을 받을 수 있게
|
||||
된다. 커뮤니티가 여러분들이 커뮤니티와 함께 일하고 있다는 것을
|
||||
느끼도록 만들고 커뮤니티가 여러분의 기능을 위한 쓰레기 장으로서
|
||||
사용되지 않고 있다는 것을 느끼게 하자. 그러나 메일링 리스트에 한번에
|
||||
50개의 이메일을 보내지는 말아라. 여러분들의 일련의 패치들은 항상
|
||||
더 작아야 한다.
|
||||
|
||||
패치를 나누는 이유는 다음과 같다.
|
||||
|
||||
1) 작은 패치들은 여러분의 패치들이 적용될 수 있는 확률을 높여준다.
|
||||
왜냐하면 다른 사람들은 정확성을 검증하기 위하여 많은 시간과 노력을
|
||||
들이기를 원하지 않는다. 5줄의 패치는 메인트너가 거의 몇 초간 힐끗
|
||||
보면 적용될 수 있다. 그러나 500 줄의 패치는 정확성을 검토하기 위하여
|
||||
몇시간이 걸릴 수도 있다(걸리는 시간은 패치의 크기 혹은 다른 것에
|
||||
비례하여 기하급수적으로 늘어난다).
|
||||
|
||||
패치를 작게 만드는 것은 무엇인가 잘못되었을 때 디버그하는 것을
|
||||
쉽게 만든다. 즉, 그렇게 만드는 것은 매우 큰 패치를 적용한 후에
|
||||
조사하는 것 보다 작은 패치를 적용한 후에 (그리고 몇몇의 것이
|
||||
깨졌을 때) 하나씩 패치들을 제거해가며 디버그 하기 쉽도록 만들어 준다.
|
||||
|
||||
2) 작은 패치들을 보내는 것뿐만 아니라 패치들을 제출하기전에 재작성하고
|
||||
간단하게(혹은 간단한게 재배치하여) 하는 것도 중요하다.
|
||||
|
||||
여기에 커널 개발자 Al Viro의 이야기가 있다.
|
||||
"학생의 수학 숙제를 채점하는 선생님을 생각해보라. 선생님은 학생들이
|
||||
답을 얻을때까지 겪은 시행착오를 보길 원하지 않는다. 선생님들은
|
||||
간결하고 가장 뛰어난 답을 보길 원한다. 훌륭한 학생은 이것을 알고
|
||||
마지막으로 답을 얻기 전 중간 과정들을 제출하진 않는다.
|
||||
|
||||
커널 개발도 마찬가지이다. 메인트너들과 검토하는 사람들은 문제를
|
||||
풀어나가는 과정속에 숨겨진 과정을 보길 원하진 않는다. 그들은
|
||||
간결하고 멋진 답을 보길 원한다."
|
||||
|
||||
커뮤니티와 함께 일하며 뛰어난 답을 찾고 여러분들의 완성되지 않은 일들
|
||||
사이에 균형을 유지해야 하는 어려움이 있을 수 있다. 그러므로 프로세스의
|
||||
초반에 여러분의 일을 향상시키기위한 피드백을 얻는 것 뿐만 아니라
|
||||
여러분들의 변경들을 작은 묶음으로 유지해서 심지어는 여러분의 작업의
|
||||
모든 부분이 지금은 포함될 준비가 되어있지 않지만 작은 부분은 이미
|
||||
받아들여질 수 있도록 유지하는 것이 바람직하다.
|
||||
|
||||
또한 완성되지 않았고 "나중에 수정될 것이다." 와 같은 것들은 포함하는
|
||||
패치들은 받아들여지지 않을 것이라는 점을 유념하라.
|
||||
|
||||
변경을 정당화해라
|
||||
-----------------
|
||||
|
||||
여러분들의 나누어진 패치들을 리눅스 커뮤니티가 왜 반영해야 하는지를
|
||||
알도록 하는 것은 매우 중요하다. 새로운 기능들이 필요하고 유용하다는
|
||||
것은 반드시 그에 맞는 이유가 있어야 한다.
|
||||
|
||||
|
||||
변경을 문서화해라
|
||||
-----------------
|
||||
|
||||
여러분이 패치를 보내려 할때는 여러분이 무엇을 말하려고 하는지를 충분히
|
||||
생각하여 이메일을 작성해야 한다. 이 정보는 패치를 위한 ChangeLog가 될
|
||||
것이다. 그리고 항상 그 내용을 보길 원하는 모든 사람들을 위해 보존될
|
||||
것이다. 패치는 완벽하게 다음과 같은 내용들을 포함하여 설명해야 한다.
|
||||
- 변경이 왜 필요한지
|
||||
- 패치에 관한 전체 설계 어프로치
|
||||
- 구현 상세들
|
||||
- 테스트 결과들
|
||||
|
||||
이것이 무엇인지 더 자세한 것을 알고 싶다면 다음 문서의 ChageLog 항을 봐라.
|
||||
"The Perfect Patch"
|
||||
http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
|
||||
|
||||
|
||||
|
||||
|
||||
이 모든 것을 하는 것은 매우 어려운 일이다. 완벽히 소화하는 데는 적어도 몇년이
|
||||
걸릴 수도 있다. 많은 인내와 결의가 필요한 계속되는 개선의 과정이다. 그러나
|
||||
가능한한 포기하지 말라. 많은 사람들은 이전부터 해왔던 것이고 그 사람들도
|
||||
정확하게 여러분들이 지금 서 있는 그 곳부터 시작했었다.
|
||||
|
||||
|
||||
|
||||
|
||||
----------
|
||||
"개발 프로세스"(http://linux.tar.gz/articles/2.6-development_process) 섹션을
|
||||
작성하는데 있어 참고할 문서를 사용하도록 허락해준 Paolo Ciarrocchi에게
|
||||
감사한다. 여러분들이 말해야 할 것과 말해서는 안되는 것의 목록 중 일부를 제공해준
|
||||
Randy Dunlap과 Gerrit Huizenga에게 감사한다. 또한 검토와 의견 그리고
|
||||
공헌을 아끼지 않은 Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers,
|
||||
Vojtech Pavlik, Jan Kara, Josh Boyer, Kees Cook, Andrew Morton, Andi Kleen,
|
||||
Vadim Lobanov, Jesper Juhl, Adrian Bunk, Keri Harris, Frans Pop,
|
||||
David A. Wheeler, Junio Hamano, Michael Kerrisk, and Alex Shepard에게도 감사를 전한다.
|
||||
그들의 도움이 없었다면 이 문서는 존재하지 않았을 것이다.
|
||||
|
||||
|
||||
|
||||
메인트너: Greg Kroah-Hartman <greg@kroah.com>
|
@ -27,7 +27,6 @@ in detail, and briefly here:
|
||||
- kobjects a simple object.
|
||||
- kset a set of objects of a certain type.
|
||||
- ktype a set of helpers for objects of a common type.
|
||||
- subsystem a controlling object for a number of ksets.
|
||||
|
||||
|
||||
The kobject infrastructure maintains a close relationship with the
|
||||
@ -54,13 +53,14 @@ embedded in larger data structures and replace fields they duplicate.
|
||||
1.2 Definition
|
||||
|
||||
struct kobject {
|
||||
char name[KOBJ_NAME_LEN];
|
||||
atomic_t refcount;
|
||||
const char * k_name;
|
||||
struct kref kref;
|
||||
struct list_head entry;
|
||||
struct kobject * parent;
|
||||
struct kset * kset;
|
||||
struct kobj_type * ktype;
|
||||
struct dentry * dentry;
|
||||
struct sysfs_dirent * sd;
|
||||
wait_queue_head_t poll;
|
||||
};
|
||||
|
||||
void kobject_init(struct kobject *);
|
||||
@ -137,8 +137,7 @@ If a kobject does not have a parent when it is registered, its parent
|
||||
becomes its dominant kset.
|
||||
|
||||
If a kobject does not have a parent nor a dominant kset, its directory
|
||||
is created at the top-level of the sysfs partition. This should only
|
||||
happen for kobjects that are embedded in a struct subsystem.
|
||||
is created at the top-level of the sysfs partition.
|
||||
|
||||
|
||||
|
||||
@ -150,10 +149,10 @@ A kset is a set of kobjects that are embedded in the same type.
|
||||
|
||||
|
||||
struct kset {
|
||||
struct subsystem * subsys;
|
||||
struct kobj_type * ktype;
|
||||
struct list_head list;
|
||||
struct kobject kobj;
|
||||
struct kset_uevent_ops * uevent_ops;
|
||||
};
|
||||
|
||||
|
||||
@ -169,8 +168,7 @@ struct kobject * kset_find_obj(struct kset *, char *);
|
||||
|
||||
|
||||
The type that the kobjects are embedded in is described by the ktype
|
||||
pointer. The subsystem that the kobject belongs to is pointed to by the
|
||||
subsys pointer.
|
||||
pointer.
|
||||
|
||||
A kset contains a kobject itself, meaning that it may be registered in
|
||||
the kobject hierarchy and exported via sysfs. More importantly, the
|
||||
@ -209,6 +207,41 @@ the hierarchy.
|
||||
kset_find_obj() may be used to locate a kobject with a particular
|
||||
name. The kobject, if found, is returned.
|
||||
|
||||
There are also some helper functions which names point to the formerly
|
||||
existing "struct subsystem", whose functions have been taken over by
|
||||
ksets.
|
||||
|
||||
|
||||
decl_subsys(name,type,uevent_ops)
|
||||
|
||||
Declares a kset named '<name>_subsys' of type <type> with
|
||||
uevent_ops <uevent_ops>. For example,
|
||||
|
||||
decl_subsys(devices, &ktype_device, &device_uevent_ops);
|
||||
|
||||
is equivalent to doing:
|
||||
|
||||
struct kset devices_subsys = {
|
||||
.ktype = &ktype_devices,
|
||||
.uevent_ops = &device_uevent_ops,
|
||||
};
|
||||
kobject_set_name(&devices_subsys, name);
|
||||
|
||||
The objects that are registered with a subsystem that use the
|
||||
subsystem's default list must have their kset ptr set properly. These
|
||||
objects may have embedded kobjects or ksets. The
|
||||
following helper makes setting the kset easier:
|
||||
|
||||
|
||||
kobj_set_kset_s(obj,subsys)
|
||||
|
||||
- Assumes that obj->kobj exists, and is a struct kobject.
|
||||
- Sets the kset of that kobject to the kset <subsys>.
|
||||
|
||||
int subsystem_register(struct kset *s);
|
||||
void subsystem_unregister(struct kset *s);
|
||||
|
||||
These are just wrappers around the respective kset_* functions.
|
||||
|
||||
2.3 sysfs
|
||||
|
||||
@ -254,114 +287,3 @@ Instances of struct kobj_type are not registered; only referenced by
|
||||
the kset. A kobj_type may be referenced by an arbitrary number of
|
||||
ksets, as there may be disparate sets of identical objects.
|
||||
|
||||
|
||||
|
||||
4. subsystems
|
||||
|
||||
4.1 Description
|
||||
|
||||
A subsystem represents a significant entity of code that maintains an
|
||||
arbitrary number of sets of objects of various types. Since the number
|
||||
of ksets and the type of objects they contain are variable, a
|
||||
generic representation of a subsystem is minimal.
|
||||
|
||||
|
||||
struct subsystem {
|
||||
struct kset kset;
|
||||
struct rw_semaphore rwsem;
|
||||
};
|
||||
|
||||
int subsystem_register(struct subsystem *);
|
||||
void subsystem_unregister(struct subsystem *);
|
||||
|
||||
struct subsystem * subsys_get(struct subsystem * s);
|
||||
void subsys_put(struct subsystem * s);
|
||||
|
||||
|
||||
A subsystem contains an embedded kset so:
|
||||
|
||||
- It can be represented in the object hierarchy via the kset's
|
||||
embedded kobject.
|
||||
|
||||
- It can maintain a default list of objects of one type.
|
||||
|
||||
Additional ksets may attach to the subsystem simply by referencing the
|
||||
subsystem before they are registered. (This one-way reference means
|
||||
that there is no way to determine the ksets that are attached to the
|
||||
subsystem.)
|
||||
|
||||
All ksets that are attached to a subsystem share the subsystem's R/W
|
||||
semaphore.
|
||||
|
||||
|
||||
4.2 subsystem Programming Interface.
|
||||
|
||||
The subsystem programming interface is simple and does not offer the
|
||||
flexibility that the kset and kobject programming interfaces do. They
|
||||
may be registered and unregistered, as well as reference counted. Each
|
||||
call forwards the calls to their embedded ksets (which forward the
|
||||
calls to their embedded kobjects).
|
||||
|
||||
|
||||
4.3 Helpers
|
||||
|
||||
A number of macros are available to make dealing with subsystems and
|
||||
their embedded objects easier.
|
||||
|
||||
|
||||
decl_subsys(name,type)
|
||||
|
||||
Declares a subsystem named '<name>_subsys', with an embedded kset of
|
||||
type <type>. For example,
|
||||
|
||||
decl_subsys(devices,&ktype_devices);
|
||||
|
||||
is equivalent to doing:
|
||||
|
||||
struct subsystem device_subsys = {
|
||||
.kset = {
|
||||
.kobj = {
|
||||
.name = "devices",
|
||||
},
|
||||
.ktype = &ktype_devices,
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
The objects that are registered with a subsystem that use the
|
||||
subsystem's default list must have their kset ptr set properly. These
|
||||
objects may have embedded kobjects, ksets, or other subsystems. The
|
||||
following helpers make setting the kset easier:
|
||||
|
||||
|
||||
kobj_set_kset_s(obj,subsys)
|
||||
|
||||
- Assumes that obj->kobj exists, and is a struct kobject.
|
||||
- Sets the kset of that kobject to the subsystem's embedded kset.
|
||||
|
||||
|
||||
kset_set_kset_s(obj,subsys)
|
||||
|
||||
- Assumes that obj->kset exists, and is a struct kset.
|
||||
- Sets the kset of the embedded kobject to the subsystem's
|
||||
embedded kset.
|
||||
|
||||
subsys_set_kset(obj,subsys)
|
||||
|
||||
- Assumes obj->subsys exists, and is a struct subsystem.
|
||||
- Sets obj->subsys.kset.kobj.kset to the subsystem's embedded kset.
|
||||
|
||||
|
||||
4.4 sysfs
|
||||
|
||||
subsystems are represented in sysfs via their embedded kobjects. They
|
||||
follow the same rules as previously mentioned with no exceptions. They
|
||||
typically receive a top-level directory in sysfs, except when their
|
||||
embedded kobject is part of another kset, or the parent of the
|
||||
embedded kobject is explicitly set.
|
||||
|
||||
Note that the subsystem's embedded kset must be 'attached' to the
|
||||
subsystem itself in order to use its rwsem. This is done after
|
||||
kset_add() has been called. (Not before, because kset_add() uses its
|
||||
subsystem for a default parent if it doesn't already have one).
|
||||
|
||||
|
@ -247,12 +247,6 @@ control to Kprobes.) If the probed function is declared asmlinkage,
|
||||
fastcall, or anything else that affects how args are passed, the
|
||||
handler's declaration must match.
|
||||
|
||||
NOTE: A macro JPROBE_ENTRY is provided to handle architecture-specific
|
||||
aliasing of jp->entry. In the interest of portability, it is advised
|
||||
to use:
|
||||
|
||||
jp->entry = JPROBE_ENTRY(handler);
|
||||
|
||||
register_jprobe() returns 0 on success, or a negative errno otherwise.
|
||||
|
||||
4.3 register_kretprobe
|
||||
@ -518,7 +512,7 @@ long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
|
||||
}
|
||||
|
||||
static struct jprobe my_jprobe = {
|
||||
.entry = JPROBE_ENTRY(jdo_fork)
|
||||
.entry = jdo_fork
|
||||
};
|
||||
|
||||
static int __init jprobe_init(void)
|
||||
|
28
Documentation/lguest/Makefile
Normal file
28
Documentation/lguest/Makefile
Normal file
@ -0,0 +1,28 @@
|
||||
# This creates the demonstration utility "lguest" which runs a Linux guest.
|
||||
|
||||
# For those people that have a separate object dir, look there for .config
|
||||
KBUILD_OUTPUT := ../..
|
||||
ifdef O
|
||||
ifeq ("$(origin O)", "command line")
|
||||
KBUILD_OUTPUT := $(O)
|
||||
endif
|
||||
endif
|
||||
# We rely on CONFIG_PAGE_OFFSET to know where to put lguest binary.
|
||||
include $(KBUILD_OUTPUT)/.config
|
||||
LGUEST_GUEST_TOP := ($(CONFIG_PAGE_OFFSET) - 0x08000000)
|
||||
|
||||
CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -Wl,-T,lguest.lds
|
||||
LDLIBS:=-lz
|
||||
# Removing this works for some versions of ld.so (eg. Ubuntu Feisty) and
|
||||
# not others (eg. FC7).
|
||||
LDFLAGS+=-static
|
||||
all: lguest.lds lguest
|
||||
|
||||
# The linker script on x86 is so complex the only way of creating one
|
||||
# which will link our binary in the right place is to mangle the
|
||||
# default one.
|
||||
lguest.lds:
|
||||
$(LD) --verbose | awk '/^==========/ { PRINT=1; next; } /SIZEOF_HEADERS/ { gsub(/0x[0-9A-F]*/, "$(LGUEST_GUEST_TOP)") } { if (PRINT) print $$0; }' > $@
|
||||
|
||||
clean:
|
||||
rm -f lguest.lds lguest
|
58
Documentation/lguest/extract
Normal file
58
Documentation/lguest/extract
Normal file
@ -0,0 +1,58 @@
|
||||
#! /bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
PREFIX=$1
|
||||
shift
|
||||
|
||||
trap 'rm -r $TMPDIR' 0
|
||||
TMPDIR=`mktemp -d`
|
||||
|
||||
exec 3>/dev/null
|
||||
for f; do
|
||||
while IFS="
|
||||
" read -r LINE; do
|
||||
case "$LINE" in
|
||||
*$PREFIX:[0-9]*:\**)
|
||||
NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
|
||||
if [ -f $TMPDIR/$NUM ]; then
|
||||
echo "$TMPDIR/$NUM already exits prior to $f"
|
||||
exit 1
|
||||
fi
|
||||
exec 3>>$TMPDIR/$NUM
|
||||
echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
|
||||
/bin/echo "$LINE" | sed -e "s/$PREFIX:[0-9]*//" -e "s/:\*/*/" >&3
|
||||
;;
|
||||
*$PREFIX:[0-9]*)
|
||||
NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
|
||||
if [ -f $TMPDIR/$NUM ]; then
|
||||
echo "$TMPDIR/$NUM already exits prior to $f"
|
||||
exit 1
|
||||
fi
|
||||
exec 3>>$TMPDIR/$NUM
|
||||
echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
|
||||
/bin/echo "$LINE" | sed "s/$PREFIX:[0-9]*//" >&3
|
||||
;;
|
||||
*:\**)
|
||||
/bin/echo "$LINE" | sed -e "s/:\*/*/" -e "s,/\*\*/,," >&3
|
||||
echo >&3
|
||||
exec 3>/dev/null
|
||||
;;
|
||||
*)
|
||||
/bin/echo "$LINE" >&3
|
||||
;;
|
||||
esac
|
||||
done < $f
|
||||
echo >&3
|
||||
exec 3>/dev/null
|
||||
done
|
||||
|
||||
LASTFILE=""
|
||||
for f in $TMPDIR/*; do
|
||||
if [ "$LASTFILE" != $(cat $TMPDIR/.$(basename $f) ) ]; then
|
||||
LASTFILE=$(cat $TMPDIR/.$(basename $f) )
|
||||
echo "[ $LASTFILE ]"
|
||||
fi
|
||||
cat $f
|
||||
done
|
||||
|
1524
Documentation/lguest/lguest.c
Normal file
1524
Documentation/lguest/lguest.c
Normal file
File diff suppressed because it is too large
Load Diff
129
Documentation/lguest/lguest.txt
Normal file
129
Documentation/lguest/lguest.txt
Normal file
@ -0,0 +1,129 @@
|
||||
Rusty's Remarkably Unreliable Guide to Lguest
|
||||
- or, A Young Coder's Illustrated Hypervisor
|
||||
http://lguest.ozlabs.org
|
||||
|
||||
Lguest is designed to be a minimal hypervisor for the Linux kernel, for
|
||||
Linux developers and users to experiment with virtualization with the
|
||||
minimum of complexity. Nonetheless, it should have sufficient
|
||||
features to make it useful for specific tasks, and, of course, you are
|
||||
encouraged to fork and enhance it.
|
||||
|
||||
Features:
|
||||
|
||||
- Kernel module which runs in a normal kernel.
|
||||
- Simple I/O model for communication.
|
||||
- Simple program to create new guests.
|
||||
- Logo contains cute puppies: http://lguest.ozlabs.org
|
||||
|
||||
Developer features:
|
||||
|
||||
- Fun to hack on.
|
||||
- No ABI: being tied to a specific kernel anyway, you can change anything.
|
||||
- Many opportunities for improvement or feature implementation.
|
||||
|
||||
Running Lguest:
|
||||
|
||||
- Lguest runs the same kernel as guest and host. You can configure
|
||||
them differently, but usually it's easiest not to.
|
||||
|
||||
You will need to configure your kernel with the following options:
|
||||
|
||||
CONFIG_HIGHMEM64G=n ("High Memory Support" "64GB")[1]
|
||||
CONFIG_TUN=y/m ("Universal TUN/TAP device driver support")
|
||||
CONFIG_EXPERIMENTAL=y ("Prompt for development and/or incomplete code/drivers")
|
||||
CONFIG_PARAVIRT=y ("Paravirtualization support (EXPERIMENTAL)")
|
||||
CONFIG_LGUEST=y/m ("Linux hypervisor example code")
|
||||
|
||||
and I recommend:
|
||||
CONFIG_HZ=100 ("Timer frequency")[2]
|
||||
|
||||
- A tool called "lguest" is available in this directory: type "make"
|
||||
to build it. If you didn't build your kernel in-tree, use "make
|
||||
O=<builddir>".
|
||||
|
||||
- Create or find a root disk image. There are several useful ones
|
||||
around, such as the xm-test tiny root image at
|
||||
http://xm-test.xensource.com/ramdisks/initrd-1.1-i386.img
|
||||
|
||||
For more serious work, I usually use a distribution ISO image and
|
||||
install it under qemu, then make multiple copies:
|
||||
|
||||
dd if=/dev/zero of=rootfile bs=1M count=2048
|
||||
qemu -cdrom image.iso -hda rootfile -net user -net nic -boot d
|
||||
|
||||
- "modprobe lg" if you built it as a module.
|
||||
|
||||
- Run an lguest as root:
|
||||
|
||||
Documentation/lguest/lguest 64m vmlinux --tunnet=192.168.19.1 --block=rootfile root=/dev/lgba
|
||||
|
||||
Explanation:
|
||||
64m: the amount of memory to use.
|
||||
|
||||
vmlinux: the kernel image found in the top of your build directory. You
|
||||
can also use a standard bzImage.
|
||||
|
||||
--tunnet=192.168.19.1: configures a "tap" device for networking with this
|
||||
IP address.
|
||||
|
||||
--block=rootfile: a file or block device which becomes /dev/lgba
|
||||
inside the guest.
|
||||
|
||||
root=/dev/lgba: this (and anything else on the command line) are
|
||||
kernel boot parameters.
|
||||
|
||||
- Configuring networking. I usually have the host masquerade, using
|
||||
"iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE" and "echo 1 >
|
||||
/proc/sys/net/ipv4/ip_forward". In this example, I would configure
|
||||
eth0 inside the guest at 192.168.19.2.
|
||||
|
||||
Another method is to bridge the tap device to an external interface
|
||||
using --tunnet=bridge:<bridgename>, and perhaps run dhcp on the guest
|
||||
to obtain an IP address. The bridge needs to be configured first:
|
||||
this option simply adds the tap interface to it.
|
||||
|
||||
A simple example on my system:
|
||||
|
||||
ifconfig eth0 0.0.0.0
|
||||
brctl addbr lg0
|
||||
ifconfig lg0 up
|
||||
brctl addif lg0 eth0
|
||||
dhclient lg0
|
||||
|
||||
Then use --tunnet=bridge:lg0 when launching the guest.
|
||||
|
||||
See http://linux-net.osdl.org/index.php/Bridge for general information
|
||||
on how to get bridging working.
|
||||
|
||||
- You can also create an inter-guest network using
|
||||
"--sharenet=<filename>": any two guests using the same file are on
|
||||
the same network. This file is created if it does not exist.
|
||||
|
||||
Lguest I/O model:
|
||||
|
||||
Lguest uses a simplified DMA model plus shared memory for I/O. Guests
|
||||
can communicate with each other if they share underlying memory
|
||||
(usually by the lguest program mmaping the same file), but they can
|
||||
use any non-shared memory to communicate with the lguest process.
|
||||
|
||||
Guests can register DMA buffers at any key (must be a valid physical
|
||||
address) using the LHCALL_BIND_DMA(key, dmabufs, num<<8|irq)
|
||||
hypercall. "dmabufs" is the physical address of an array of "num"
|
||||
"struct lguest_dma": each contains a used_len, and an array of
|
||||
physical addresses and lengths. When a transfer occurs, the
|
||||
"used_len" field of one of the buffers which has used_len 0 will be
|
||||
set to the length transferred and the irq will fire.
|
||||
|
||||
Using an irq value of 0 unbinds the dma buffers.
|
||||
|
||||
To send DMA, the LHCALL_SEND_DMA(key, dma_physaddr) hypercall is used,
|
||||
and the bytes used is written to the used_len field. This can be 0 if
|
||||
noone else has bound a DMA buffer to that key or some other error.
|
||||
DMA buffers bound by the same guest are ignored.
|
||||
|
||||
Cheers!
|
||||
Rusty Russell rusty@rustcorp.com.au.
|
||||
|
||||
[1] These are on various places on the TODO list, waiting for you to
|
||||
get annoyed enough at the limitation to fix it.
|
||||
[2] Lguest is not yet tickless when idle. See [1].
|
120
Documentation/lockstat.txt
Normal file
120
Documentation/lockstat.txt
Normal file
@ -0,0 +1,120 @@
|
||||
|
||||
LOCK STATISTICS
|
||||
|
||||
- WHAT
|
||||
|
||||
As the name suggests, it provides statistics on locks.
|
||||
|
||||
- WHY
|
||||
|
||||
Because things like lock contention can severely impact performance.
|
||||
|
||||
- HOW
|
||||
|
||||
Lockdep already has hooks in the lock functions and maps lock instances to
|
||||
lock classes. We build on that. The graph below shows the relation between
|
||||
the lock functions and the various hooks therein.
|
||||
|
||||
__acquire
|
||||
|
|
||||
lock _____
|
||||
| \
|
||||
| __contended
|
||||
| |
|
||||
| <wait>
|
||||
| _______/
|
||||
|/
|
||||
|
|
||||
__acquired
|
||||
|
|
||||
.
|
||||
<hold>
|
||||
.
|
||||
|
|
||||
__release
|
||||
|
|
||||
unlock
|
||||
|
||||
lock, unlock - the regular lock functions
|
||||
__* - the hooks
|
||||
<> - states
|
||||
|
||||
With these hooks we provide the following statistics:
|
||||
|
||||
con-bounces - number of lock contention that involved x-cpu data
|
||||
contentions - number of lock acquisitions that had to wait
|
||||
wait time min - shortest (non-0) time we ever had to wait for a lock
|
||||
max - longest time we ever had to wait for a lock
|
||||
total - total time we spend waiting on this lock
|
||||
acq-bounces - number of lock acquisitions that involved x-cpu data
|
||||
acquisitions - number of times we took the lock
|
||||
hold time min - shortest (non-0) time we ever held the lock
|
||||
max - longest time we ever held the lock
|
||||
total - total time this lock was held
|
||||
|
||||
From these number various other statistics can be derived, such as:
|
||||
|
||||
hold time average = hold time total / acquisitions
|
||||
|
||||
These numbers are gathered per lock class, per read/write state (when
|
||||
applicable).
|
||||
|
||||
It also tracks 4 contention points per class. A contention point is a call site
|
||||
that had to wait on lock acquisition.
|
||||
|
||||
- USAGE
|
||||
|
||||
Look at the current lock statistics:
|
||||
|
||||
( line numbers not part of actual output, done for clarity in the explanation
|
||||
below )
|
||||
|
||||
# less /proc/lock_stat
|
||||
|
||||
01 lock_stat version 0.2
|
||||
02 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total
|
||||
04 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
05
|
||||
06 &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60
|
||||
07 &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38
|
||||
08 --------------------------
|
||||
09 &inode->i_data.tree_lock 0 [<ffffffff8027c08f>] add_to_page_cache+0x5f/0x190
|
||||
10
|
||||
11 ...............................................................................................................................................................................................
|
||||
12
|
||||
13 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24
|
||||
14 -----------
|
||||
15 dcache_lock 180 [<ffffffff802c0d7e>] sys_getcwd+0x11e/0x230
|
||||
16 dcache_lock 165 [<ffffffff802c002a>] d_alloc+0x15a/0x210
|
||||
17 dcache_lock 33 [<ffffffff8035818d>] _atomic_dec_and_lock+0x4d/0x70
|
||||
18 dcache_lock 1 [<ffffffff802beef8>] shrink_dcache_parent+0x18/0x130
|
||||
|
||||
This excerpt shows the first two lock class statistics. Line 01 shows the
|
||||
output version - each time the format changes this will be updated. Line 02-04
|
||||
show the header with column descriptions. Lines 05-10 and 13-18 show the actual
|
||||
statistics. These statistics come in two parts; the actual stats separated by a
|
||||
short separator (line 08, 14) from the contention points.
|
||||
|
||||
The first lock (05-10) is a read/write lock, and shows two lines above the
|
||||
short separator. The contention points don't match the column descriptors,
|
||||
they have two: contentions and [<IP>] symbol.
|
||||
|
||||
|
||||
View the top contending locks:
|
||||
|
||||
# grep : /proc/lock_stat | head
|
||||
&inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60
|
||||
&inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38
|
||||
dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24
|
||||
&inode->i_mutex: 161 286 18446744073709 62882.54 1244614.55 3653 20598 18446744073709 62318.60 1693822.74
|
||||
&zone->lru_lock: 94 94 0.53 7.33 92.10 4366 32690 0.29 59.81 16350.06
|
||||
&inode->i_data.i_mmap_lock: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44
|
||||
&q->__queue_lock: 48 50 0.52 31.62 86.31 774 13131 0.17 113.08 12277.52
|
||||
&rq->rq_lock_key: 43 47 0.74 68.50 170.63 3706 33929 0.22 107.99 17460.62
|
||||
&rq->rq_lock_key#2: 39 46 0.75 6.68 49.03 2979 32292 0.17 125.17 17137.63
|
||||
tasklist_lock-W: 15 15 1.45 10.87 32.70 1201 7390 0.58 62.55 13648.47
|
||||
|
||||
Clear the statistics:
|
||||
|
||||
# echo 0 > /proc/lock_stat
|
322
Documentation/memory-hotplug.txt
Normal file
322
Documentation/memory-hotplug.txt
Normal file
@ -0,0 +1,322 @@
|
||||
==============
|
||||
Memory Hotplug
|
||||
==============
|
||||
|
||||
Last Updated: Jul 28 2007
|
||||
|
||||
This document is about memory hotplug including how-to-use and current status.
|
||||
Because Memory Hotplug is still under development, contents of this text will
|
||||
be changed often.
|
||||
|
||||
1. Introduction
|
||||
1.1 purpose of memory hotplug
|
||||
1.2. Phases of memory hotplug
|
||||
1.3. Unit of Memory online/offline operation
|
||||
2. Kernel Configuration
|
||||
3. sysfs files for memory hotplug
|
||||
4. Physical memory hot-add phase
|
||||
4.1 Hardware(Firmware) Support
|
||||
4.2 Notify memory hot-add event by hand
|
||||
5. Logical Memory hot-add phase
|
||||
5.1. State of memory
|
||||
5.2. How to online memory
|
||||
6. Logical memory remove
|
||||
6.1 Memory offline and ZONE_MOVABLE
|
||||
6.2. How to offline memory
|
||||
7. Physical memory remove
|
||||
8. Future Work List
|
||||
|
||||
Note(1): x86_64's has special implementation for memory hotplug.
|
||||
This text does not describe it.
|
||||
Note(2): This text assumes that sysfs is mounted at /sys.
|
||||
|
||||
|
||||
---------------
|
||||
1. Introduction
|
||||
---------------
|
||||
|
||||
1.1 purpose of memory hotplug
|
||||
------------
|
||||
Memory Hotplug allows users to increase/decrease the amount of memory.
|
||||
Generally, there are two purposes.
|
||||
|
||||
(A) For changing the amount of memory.
|
||||
This is to allow a feature like capacity on demand.
|
||||
(B) For installing/removing DIMMs or NUMA-nodes physically.
|
||||
This is to exchange DIMMs/NUMA-nodes, reduce power consumption, etc.
|
||||
|
||||
(A) is required by highly virtualized environments and (B) is required by
|
||||
hardware which supports memory power management.
|
||||
|
||||
Linux memory hotplug is designed for both purpose.
|
||||
|
||||
|
||||
1.2. Phases of memory hotplug
|
||||
---------------
|
||||
There are 2 phases in Memory Hotplug.
|
||||
1) Physical Memory Hotplug phase
|
||||
2) Logical Memory Hotplug phase.
|
||||
|
||||
The First phase is to communicate hardware/firmware and make/erase
|
||||
environment for hotplugged memory. Basically, this phase is necessary
|
||||
for the purpose (B), but this is good phase for communication between
|
||||
highly virtualized environments too.
|
||||
|
||||
When memory is hotplugged, the kernel recognizes new memory, makes new memory
|
||||
management tables, and makes sysfs files for new memory's operation.
|
||||
|
||||
If firmware supports notification of connection of new memory to OS,
|
||||
this phase is triggered automatically. ACPI can notify this event. If not,
|
||||
"probe" operation by system administration is used instead.
|
||||
(see Section 4.).
|
||||
|
||||
Logical Memory Hotplug phase is to change memory state into
|
||||
avaiable/unavailable for users. Amount of memory from user's view is
|
||||
changed by this phase. The kernel makes all memory in it as free pages
|
||||
when a memory range is available.
|
||||
|
||||
In this document, this phase is described as online/offline.
|
||||
|
||||
Logical Memory Hotplug phase is triggred by write of sysfs file by system
|
||||
administrator. For the hot-add case, it must be executed after Physical Hotplug
|
||||
phase by hand.
|
||||
(However, if you writes udev's hotplug scripts for memory hotplug, these
|
||||
phases can be execute in seamless way.)
|
||||
|
||||
|
||||
1.3. Unit of Memory online/offline operation
|
||||
------------
|
||||
Memory hotplug uses SPARSEMEM memory model. SPARSEMEM divides the whole memory
|
||||
into chunks of the same size. The chunk is called a "section". The size of
|
||||
a section is architecture dependent. For example, power uses 16MiB, ia64 uses
|
||||
1GiB. The unit of online/offline operation is "one section". (see Section 3.)
|
||||
|
||||
To determine the size of sections, please read this file:
|
||||
|
||||
/sys/devices/system/memory/block_size_bytes
|
||||
|
||||
This file shows the size of sections in byte.
|
||||
|
||||
-----------------------
|
||||
2. Kernel Configuration
|
||||
-----------------------
|
||||
To use memory hotplug feature, kernel must be compiled with following
|
||||
config options.
|
||||
|
||||
- For all memory hotplug
|
||||
Memory model -> Sparse Memory (CONFIG_SPARSEMEM)
|
||||
Allow for memory hot-add (CONFIG_MEMORY_HOTPLUG)
|
||||
|
||||
- To enable memory removal, the followings are also necessary
|
||||
Allow for memory hot remove (CONFIG_MEMORY_HOTREMOVE)
|
||||
Page Migration (CONFIG_MIGRATION)
|
||||
|
||||
- For ACPI memory hotplug, the followings are also necessary
|
||||
Memory hotplug (under ACPI Support menu) (CONFIG_ACPI_HOTPLUG_MEMORY)
|
||||
This option can be kernel module.
|
||||
|
||||
- As a related configuration, if your box has a feature of NUMA-node hotplug
|
||||
via ACPI, then this option is necessary too.
|
||||
ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu)
|
||||
(CONFIG_ACPI_CONTAINER).
|
||||
This option can be kernel module too.
|
||||
|
||||
--------------------------------
|
||||
3 sysfs files for memory hotplug
|
||||
--------------------------------
|
||||
All sections have their device information under /sys/devices/system/memory as
|
||||
|
||||
/sys/devices/system/memory/memoryXXX
|
||||
(XXX is section id.)
|
||||
|
||||
Now, XXX is defined as start_address_of_section / section_size.
|
||||
|
||||
For example, assume 1GiB section size. A device for a memory starting at
|
||||
0x100000000 is /sys/device/system/memory/memory4
|
||||
(0x100000000 / 1Gib = 4)
|
||||
This device covers address range [0x100000000 ... 0x140000000)
|
||||
|
||||
Under each section, you can see 3 files.
|
||||
|
||||
/sys/devices/system/memory/memoryXXX/phys_index
|
||||
/sys/devices/system/memory/memoryXXX/phys_device
|
||||
/sys/devices/system/memory/memoryXXX/state
|
||||
|
||||
'phys_index' : read-only and contains section id, same as XXX.
|
||||
'state' : read-write
|
||||
at read: contains online/offline state of memory.
|
||||
at write: user can specify "online", "offline" command
|
||||
'phys_device': read-only: designed to show the name of physical memory device.
|
||||
This is not well implemented now.
|
||||
|
||||
NOTE:
|
||||
These directories/files appear after physical memory hotplug phase.
|
||||
|
||||
|
||||
--------------------------------
|
||||
4. Physical memory hot-add phase
|
||||
--------------------------------
|
||||
|
||||
4.1 Hardware(Firmware) Support
|
||||
------------
|
||||
On x86_64/ia64 platform, memory hotplug by ACPI is supported.
|
||||
|
||||
In general, the firmware (ACPI) which supports memory hotplug defines
|
||||
memory class object of _HID "PNP0C80". When a notify is asserted to PNP0C80,
|
||||
Linux's ACPI handler does hot-add memory to the system and calls a hotplug udev
|
||||
script. This will be done automatically.
|
||||
|
||||
But scripts for memory hotplug are not contained in generic udev package(now).
|
||||
You may have to write it by yourself or online/offline memory by hand.
|
||||
Please see "How to online memory", "How to offline memory" in this text.
|
||||
|
||||
If firmware supports NUMA-node hotplug, and defines an object _HID "ACPI0004",
|
||||
"PNP0A05", or "PNP0A06", notification is asserted to it, and ACPI handler
|
||||
calls hotplug code for all of objects which are defined in it.
|
||||
If memory device is found, memory hotplug code will be called.
|
||||
|
||||
|
||||
4.2 Notify memory hot-add event by hand
|
||||
------------
|
||||
In some environments, especially virtualized environment, firmware will not
|
||||
notify memory hotplug event to the kernel. For such environment, "probe"
|
||||
interface is supported. This interface depends on CONFIG_ARCH_MEMORY_PROBE.
|
||||
|
||||
Now, CONFIG_ARCH_MEMORY_PROBE is supported only by powerpc but it does not
|
||||
contain highly architecture codes. Please add config if you need "probe"
|
||||
interface.
|
||||
|
||||
Probe interface is located at
|
||||
/sys/devices/system/memory/probe
|
||||
|
||||
You can tell the physical address of new memory to the kernel by
|
||||
|
||||
% echo start_address_of_new_memory > /sys/devices/system/memory/probe
|
||||
|
||||
Then, [start_address_of_new_memory, start_address_of_new_memory + section_size)
|
||||
memory range is hot-added. In this case, hotplug script is not called (in
|
||||
current implementation). You'll have to online memory by yourself.
|
||||
Please see "How to online memory" in this text.
|
||||
|
||||
|
||||
|
||||
------------------------------
|
||||
5. Logical Memory hot-add phase
|
||||
------------------------------
|
||||
|
||||
5.1. State of memory
|
||||
------------
|
||||
To see (online/offline) state of memory section, read 'state' file.
|
||||
|
||||
% cat /sys/device/system/memory/memoryXXX/state
|
||||
|
||||
|
||||
If the memory section is online, you'll read "online".
|
||||
If the memory section is offline, you'll read "offline".
|
||||
|
||||
|
||||
5.2. How to online memory
|
||||
------------
|
||||
Even if the memory is hot-added, it is not at ready-to-use state.
|
||||
For using newly added memory, you have to "online" the memory section.
|
||||
|
||||
For onlining, you have to write "online" to the section's state file as:
|
||||
|
||||
% echo online > /sys/devices/system/memory/memoryXXX/state
|
||||
|
||||
After this, section memoryXXX's state will be 'online' and the amount of
|
||||
available memory will be increased.
|
||||
|
||||
Currently, newly added memory is added as ZONE_NORMAL (for powerpc, ZONE_DMA).
|
||||
This may be changed in future.
|
||||
|
||||
|
||||
|
||||
------------------------
|
||||
6. Logical memory remove
|
||||
------------------------
|
||||
|
||||
6.1 Memory offline and ZONE_MOVABLE
|
||||
------------
|
||||
Memory offlining is more complicated than memory online. Because memory offline
|
||||
has to make the whole memory section be unused, memory offline can fail if
|
||||
the section includes memory which cannot be freed.
|
||||
|
||||
In general, memory offline can use 2 techniques.
|
||||
|
||||
(1) reclaim and free all memory in the section.
|
||||
(2) migrate all pages in the section.
|
||||
|
||||
In the current implementation, Linux's memory offline uses method (2), freeing
|
||||
all pages in the section by page migration. But not all pages are
|
||||
migratable. Under current Linux, migratable pages are anonymous pages and
|
||||
page caches. For offlining a section by migration, the kernel has to guarantee
|
||||
that the section contains only migratable pages.
|
||||
|
||||
Now, a boot option for making a section which consists of migratable pages is
|
||||
supported. By specifying "kernelcore=" or "movablecore=" boot option, you can
|
||||
create ZONE_MOVABLE...a zone which is just used for movable pages.
|
||||
(See also Documentation/kernel-parameters.txt)
|
||||
|
||||
Assume the system has "TOTAL" amount of memory at boot time, this boot option
|
||||
creates ZONE_MOVABLE as following.
|
||||
|
||||
1) When kernelcore=YYYY boot option is used,
|
||||
Size of memory not for movable pages (not for offline) is YYYY.
|
||||
Size of memory for movable pages (for offline) is TOTAL-YYYY.
|
||||
|
||||
2) When movablecore=ZZZZ boot option is used,
|
||||
Size of memory not for movable pages (not for offline) is TOTAL - ZZZZ.
|
||||
Size of memory for movable pages (for offline) is ZZZZ.
|
||||
|
||||
|
||||
Note) Unfortunately, there is no information to show which section belongs
|
||||
to ZONE_MOVABLE. This is TBD.
|
||||
|
||||
|
||||
6.2. How to offline memory
|
||||
------------
|
||||
You can offline a section by using the same sysfs interface that was used in
|
||||
memory onlining.
|
||||
|
||||
% echo offline > /sys/devices/system/memory/memoryXXX/state
|
||||
|
||||
If offline succeeds, the state of the memory section is changed to be "offline".
|
||||
If it fails, some error core (like -EBUSY) will be returned by the kernel.
|
||||
Even if a section does not belong to ZONE_MOVABLE, you can try to offline it.
|
||||
If it doesn't contain 'unmovable' memory, you'll get success.
|
||||
|
||||
A section under ZONE_MOVABLE is considered to be able to be offlined easily.
|
||||
But under some busy state, it may return -EBUSY. Even if a memory section
|
||||
cannot be offlined due to -EBUSY, you can retry offlining it and may be able to
|
||||
offline it (or not).
|
||||
(For example, a page is referred to by some kernel internal call and released
|
||||
soon.)
|
||||
|
||||
Consideration:
|
||||
Memory hotplug's design direction is to make the possibility of memory offlining
|
||||
higher and to guarantee unplugging memory under any situation. But it needs
|
||||
more work. Returning -EBUSY under some situation may be good because the user
|
||||
can decide to retry more or not by himself. Currently, memory offlining code
|
||||
does some amount of retry with 120 seconds timeout.
|
||||
|
||||
-------------------------
|
||||
7. Physical memory remove
|
||||
-------------------------
|
||||
Need more implementation yet....
|
||||
- Notification completion of remove works by OS to firmware.
|
||||
- Guard from remove if not yet.
|
||||
|
||||
--------------
|
||||
8. Future Work
|
||||
--------------
|
||||
- allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
|
||||
sysctl or new control file.
|
||||
- showing memory section and physical device relationship.
|
||||
- showing memory section and node relationship (maybe good for NUMA)
|
||||
- showing memory section is under ZONE_MOVABLE or not
|
||||
- test and make it better memory offlining.
|
||||
- support HugeTLB page migration and offlining.
|
||||
- memmap removing at memory offline.
|
||||
- physical remove memory.
|
||||
|
@ -96,6 +96,9 @@ routing.txt
|
||||
- the new routing mechanism
|
||||
shaper.txt
|
||||
- info on the module that can shape/limit transmitted traffic.
|
||||
sk98lin.txt
|
||||
- Marvell Yukon Chipset / SysKonnect SK-98xx compliant Gigabit
|
||||
Ethernet Adapter family driver info
|
||||
skfp.txt
|
||||
- SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info.
|
||||
smc9.txt
|
||||
|
@ -1,766 +0,0 @@
|
||||
HISTORY:
|
||||
February 16/2002 -- revision 0.2.1:
|
||||
COR typo corrected
|
||||
February 10/2002 -- revision 0.2:
|
||||
some spell checking ;->
|
||||
January 12/2002 -- revision 0.1
|
||||
This is still work in progress so may change.
|
||||
To keep up to date please watch this space.
|
||||
|
||||
Introduction to NAPI
|
||||
====================
|
||||
|
||||
NAPI is a proven (www.cyberus.ca/~hadi/usenix-paper.tgz) technique
|
||||
to improve network performance on Linux. For more details please
|
||||
read that paper.
|
||||
NAPI provides a "inherent mitigation" which is bound by system capacity
|
||||
as can be seen from the following data collected by Robert on Gigabit
|
||||
ethernet (e1000):
|
||||
|
||||
Psize Ipps Tput Rxint Txint Done Ndone
|
||||
---------------------------------------------------------------
|
||||
60 890000 409362 17 27622 7 6823
|
||||
128 758150 464364 21 9301 10 7738
|
||||
256 445632 774646 42 15507 21 12906
|
||||
512 232666 994445 241292 19147 241192 1062
|
||||
1024 119061 1000003 872519 19258 872511 0
|
||||
1440 85193 1000003 946576 19505 946569 0
|
||||
|
||||
|
||||
Legend:
|
||||
"Ipps" stands for input packets per second.
|
||||
"Tput" == packets out of total 1M that made it out.
|
||||
"txint" == transmit completion interrupts seen
|
||||
"Done" == The number of times that the poll() managed to pull all
|
||||
packets out of the rx ring. Note from this that the lower the
|
||||
load the more we could clean up the rxring
|
||||
"Ndone" == is the converse of "Done". Note again, that the higher
|
||||
the load the more times we couldn't clean up the rxring.
|
||||
|
||||
Observe that:
|
||||
when the NIC receives 890Kpackets/sec only 17 rx interrupts are generated.
|
||||
The system cant handle the processing at 1 interrupt/packet at that load level.
|
||||
At lower rates on the other hand, rx interrupts go up and therefore the
|
||||
interrupt/packet ratio goes up (as observable from that table). So there is
|
||||
possibility that under low enough input, you get one poll call for each
|
||||
input packet caused by a single interrupt each time. And if the system
|
||||
cant handle interrupt per packet ratio of 1, then it will just have to
|
||||
chug along ....
|
||||
|
||||
|
||||
0) Prerequisites:
|
||||
==================
|
||||
A driver MAY continue using the old 2.4 technique for interfacing
|
||||
to the network stack and not benefit from the NAPI changes.
|
||||
NAPI additions to the kernel do not break backward compatibility.
|
||||
NAPI, however, requires the following features to be available:
|
||||
|
||||
A) DMA ring or enough RAM to store packets in software devices.
|
||||
|
||||
B) Ability to turn off interrupts or maybe events that send packets up
|
||||
the stack.
|
||||
|
||||
NAPI processes packet events in what is known as dev->poll() method.
|
||||
Typically, only packet receive events are processed in dev->poll().
|
||||
The rest of the events MAY be processed by the regular interrupt handler
|
||||
to reduce processing latency (justified also because there are not that
|
||||
many of them).
|
||||
Note, however, NAPI does not enforce that dev->poll() only processes
|
||||
receive events.
|
||||
Tests with the tulip driver indicated slightly increased latency if
|
||||
all of the interrupt handler is moved to dev->poll(). Also MII handling
|
||||
gets a little trickier.
|
||||
The example used in this document is to move the receive processing only
|
||||
to dev->poll(); this is shown with the patch for the tulip driver.
|
||||
For an example of code that moves all the interrupt driver to
|
||||
dev->poll() look at the ported e1000 code.
|
||||
|
||||
There are caveats that might force you to go with moving everything to
|
||||
dev->poll(). Different NICs work differently depending on their status/event
|
||||
acknowledgement setup.
|
||||
There are two types of event register ACK mechanisms.
|
||||
I) what is known as Clear-on-read (COR).
|
||||
when you read the status/event register, it clears everything!
|
||||
The natsemi and sunbmac NICs are known to do this.
|
||||
In this case your only choice is to move all to dev->poll()
|
||||
|
||||
II) Clear-on-write (COW)
|
||||
i) you clear the status by writing a 1 in the bit-location you want.
|
||||
These are the majority of the NICs and work the best with NAPI.
|
||||
Put only receive events in dev->poll(); leave the rest in
|
||||
the old interrupt handler.
|
||||
ii) whatever you write in the status register clears every thing ;->
|
||||
Cant seem to find any supported by Linux which do this. If
|
||||
someone knows such a chip email us please.
|
||||
Move all to dev->poll()
|
||||
|
||||
C) Ability to detect new work correctly.
|
||||
NAPI works by shutting down event interrupts when there's work and
|
||||
turning them on when there's none.
|
||||
New packets might show up in the small window while interrupts were being
|
||||
re-enabled (refer to appendix 2). A packet might sneak in during the period
|
||||
we are enabling interrupts. We only get to know about such a packet when the
|
||||
next new packet arrives and generates an interrupt.
|
||||
Essentially, there is a small window of opportunity for a race condition
|
||||
which for clarity we'll refer to as the "rotting packet".
|
||||
|
||||
This is a very important topic and appendix 2 is dedicated for more
|
||||
discussion.
|
||||
|
||||
Locking rules and environmental guarantees
|
||||
==========================================
|
||||
|
||||
-Guarantee: Only one CPU at any time can call dev->poll(); this is because
|
||||
only one CPU can pick the initial interrupt and hence the initial
|
||||
netif_rx_schedule(dev);
|
||||
- The core layer invokes devices to send packets in a round robin format.
|
||||
This implies receive is totally lockless because of the guarantee that only
|
||||
one CPU is executing it.
|
||||
- contention can only be the result of some other CPU accessing the rx
|
||||
ring. This happens only in close() and suspend() (when these methods
|
||||
try to clean the rx ring);
|
||||
****guarantee: driver authors need not worry about this; synchronization
|
||||
is taken care for them by the top net layer.
|
||||
-local interrupts are enabled (if you dont move all to dev->poll()). For
|
||||
example link/MII and txcomplete continue functioning just same old way.
|
||||
This improves the latency of processing these events. It is also assumed that
|
||||
the receive interrupt is the largest cause of noise. Note this might not
|
||||
always be true.
|
||||
[according to Manfred Spraul, the winbond insists on sending one
|
||||
txmitcomplete interrupt for each packet (although this can be mitigated)].
|
||||
For these broken drivers, move all to dev->poll().
|
||||
|
||||
For the rest of this text, we'll assume that dev->poll() only
|
||||
processes receive events.
|
||||
|
||||
new methods introduce by NAPI
|
||||
=============================
|
||||
|
||||
a) netif_rx_schedule(dev)
|
||||
Called by an IRQ handler to schedule a poll for device
|
||||
|
||||
b) netif_rx_schedule_prep(dev)
|
||||
puts the device in a state which allows for it to be added to the
|
||||
CPU polling list if it is up and running. You can look at this as
|
||||
the first half of netif_rx_schedule(dev) above; the second half
|
||||
being c) below.
|
||||
|
||||
c) __netif_rx_schedule(dev)
|
||||
Add device to the poll list for this CPU; assuming that _prep above
|
||||
has already been called and returned 1.
|
||||
|
||||
d) netif_rx_reschedule(dev, undo)
|
||||
Called to reschedule polling for device specifically for some
|
||||
deficient hardware. Read Appendix 2 for more details.
|
||||
|
||||
e) netif_rx_complete(dev)
|
||||
|
||||
Remove interface from the CPU poll list: it must be in the poll list
|
||||
on current cpu. This primitive is called by dev->poll(), when
|
||||
it completes its work. The device cannot be out of poll list at this
|
||||
call, if it is then clearly it is a BUG(). You'll know ;->
|
||||
|
||||
All of the above methods are used below, so keep reading for clarity.
|
||||
|
||||
Device driver changes to be made when porting NAPI
|
||||
==================================================
|
||||
|
||||
Below we describe what kind of changes are required for NAPI to work.
|
||||
|
||||
1) introduction of dev->poll() method
|
||||
=====================================
|
||||
|
||||
This is the method that is invoked by the network core when it requests
|
||||
for new packets from the driver. A driver is allowed to send upto
|
||||
dev->quota packets by the current CPU before yielding to the network
|
||||
subsystem (so other devices can also get opportunity to send to the stack).
|
||||
|
||||
dev->poll() prototype looks as follows:
|
||||
int my_poll(struct net_device *dev, int *budget)
|
||||
|
||||
budget is the remaining number of packets the network subsystem on the
|
||||
current CPU can send up the stack before yielding to other system tasks.
|
||||
*Each driver is responsible for decrementing budget by the total number of
|
||||
packets sent.
|
||||
Total number of packets cannot exceed dev->quota.
|
||||
|
||||
dev->poll() method is invoked by the top layer, the driver just sends if it
|
||||
can to the stack the packet quantity requested.
|
||||
|
||||
more on dev->poll() below after the interrupt changes are explained.
|
||||
|
||||
2) registering dev->poll() method
|
||||
===================================
|
||||
|
||||
dev->poll should be set in the dev->probe() method.
|
||||
e.g:
|
||||
dev->open = my_open;
|
||||
.
|
||||
.
|
||||
/* two new additions */
|
||||
/* first register my poll method */
|
||||
dev->poll = my_poll;
|
||||
/* next register my weight/quanta; can be overridden in /proc */
|
||||
dev->weight = 16;
|
||||
.
|
||||
.
|
||||
dev->stop = my_close;
|
||||
|
||||
|
||||
|
||||
3) scheduling dev->poll()
|
||||
=============================
|
||||
This involves modifying the interrupt handler and the code
|
||||
path which takes the packet off the NIC and sends them to the
|
||||
stack.
|
||||
|
||||
it's important at this point to introduce the classical D Becker
|
||||
interrupt processor:
|
||||
|
||||
------------------
|
||||
static irqreturn_t
|
||||
netdevice_interrupt(int irq, void *dev_id, struct pt_regs *regs)
|
||||
{
|
||||
|
||||
struct net_device *dev = (struct net_device *)dev_instance;
|
||||
struct my_private *tp = (struct my_private *)dev->priv;
|
||||
|
||||
int work_count = my_work_count;
|
||||
status = read_interrupt_status_reg();
|
||||
if (status == 0)
|
||||
return IRQ_NONE; /* Shared IRQ: not us */
|
||||
if (status == 0xffff)
|
||||
return IRQ_HANDLED; /* Hot unplug */
|
||||
if (status & error)
|
||||
do_some_error_handling()
|
||||
|
||||
do {
|
||||
acknowledge_ints_ASAP();
|
||||
|
||||
if (status & link_interrupt) {
|
||||
spin_lock(&tp->link_lock);
|
||||
do_some_link_stat_stuff();
|
||||
spin_lock(&tp->link_lock);
|
||||
}
|
||||
|
||||
if (status & rx_interrupt) {
|
||||
receive_packets(dev);
|
||||
}
|
||||
|
||||
if (status & rx_nobufs) {
|
||||
make_rx_buffs_avail();
|
||||
}
|
||||
|
||||
if (status & tx_related) {
|
||||
spin_lock(&tp->lock);
|
||||
tx_ring_free(dev);
|
||||
if (tx_died)
|
||||
restart_tx();
|
||||
spin_unlock(&tp->lock);
|
||||
}
|
||||
|
||||
status = read_interrupt_status_reg();
|
||||
|
||||
} while (!(status & error) || more_work_to_be_done);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
We now change this to what is shown below to NAPI-enable it:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
static irqreturn_t
|
||||
netdevice_interrupt(int irq, void *dev_id, struct pt_regs *regs)
|
||||
{
|
||||
struct net_device *dev = (struct net_device *)dev_instance;
|
||||
struct my_private *tp = (struct my_private *)dev->priv;
|
||||
|
||||
status = read_interrupt_status_reg();
|
||||
if (status == 0)
|
||||
return IRQ_NONE; /* Shared IRQ: not us */
|
||||
if (status == 0xffff)
|
||||
return IRQ_HANDLED; /* Hot unplug */
|
||||
if (status & error)
|
||||
do_some_error_handling();
|
||||
|
||||
do {
|
||||
/************************ start note *********************************/
|
||||
acknowledge_ints_ASAP(); // dont ack rx and rxnobuff here
|
||||
/************************ end note *********************************/
|
||||
|
||||
if (status & link_interrupt) {
|
||||
spin_lock(&tp->link_lock);
|
||||
do_some_link_stat_stuff();
|
||||
spin_unlock(&tp->link_lock);
|
||||
}
|
||||
/************************ start note *********************************/
|
||||
if (status & rx_interrupt || (status & rx_nobuffs)) {
|
||||
if (netif_rx_schedule_prep(dev)) {
|
||||
|
||||
/* disable interrupts caused
|
||||
* by arriving packets */
|
||||
disable_rx_and_rxnobuff_ints();
|
||||
/* tell system we have work to be done. */
|
||||
__netif_rx_schedule(dev);
|
||||
} else {
|
||||
printk("driver bug! interrupt while in poll\n");
|
||||
/* FIX by disabling interrupts */
|
||||
disable_rx_and_rxnobuff_ints();
|
||||
}
|
||||
}
|
||||
/************************ end note note *********************************/
|
||||
|
||||
if (status & tx_related) {
|
||||
spin_lock(&tp->lock);
|
||||
tx_ring_free(dev);
|
||||
|
||||
if (tx_died)
|
||||
restart_tx();
|
||||
spin_unlock(&tp->lock);
|
||||
}
|
||||
|
||||
status = read_interrupt_status_reg();
|
||||
|
||||
/************************ start note *********************************/
|
||||
} while (!(status & error) || more_work_to_be_done(status));
|
||||
/************************ end note note *********************************/
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
---------------------------------------------------------------------
|
||||
|
||||
|
||||
We note several things from above:
|
||||
|
||||
I) Any interrupt source which is caused by arriving packets is now
|
||||
turned off when it occurs. Depending on the hardware, there could be
|
||||
several reasons that arriving packets would cause interrupts; these are the
|
||||
interrupt sources we wish to avoid. The two common ones are a) a packet
|
||||
arriving (rxint) b) a packet arriving and finding no DMA buffers available
|
||||
(rxnobuff) .
|
||||
This means also acknowledge_ints_ASAP() will not clear the status
|
||||
register for those two items above; clearing is done in the place where
|
||||
proper work is done within NAPI; at the poll() and refill_rx_ring()
|
||||
discussed further below.
|
||||
netif_rx_schedule_prep() returns 1 if device is in running state and
|
||||
gets successfully added to the core poll list. If we get a zero value
|
||||
we can _almost_ assume are already added to the list (instead of not running.
|
||||
Logic based on the fact that you shouldn't get interrupt if not running)
|
||||
We rectify this by disabling rx and rxnobuf interrupts.
|
||||
|
||||
II) that receive_packets(dev) and make_rx_buffs_avail() may have disappeared.
|
||||
These functionalities are still around actually......
|
||||
|
||||
infact, receive_packets(dev) is very close to my_poll() and
|
||||
make_rx_buffs_avail() is invoked from my_poll()
|
||||
|
||||
4) converting receive_packets() to dev->poll()
|
||||
===============================================
|
||||
|
||||
We need to convert the classical D Becker receive_packets(dev) to my_poll()
|
||||
|
||||
First the typical receive_packets() below:
|
||||
-------------------------------------------------------------------
|
||||
|
||||
/* this is called by interrupt handler */
|
||||
static void receive_packets (struct net_device *dev)
|
||||
{
|
||||
|
||||
struct my_private *tp = (struct my_private *)dev->priv;
|
||||
rx_ring = tp->rx_ring;
|
||||
cur_rx = tp->cur_rx;
|
||||
int entry = cur_rx % RX_RING_SIZE;
|
||||
int received = 0;
|
||||
int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx;
|
||||
|
||||
while (rx_ring_not_empty) {
|
||||
u32 rx_status;
|
||||
unsigned int rx_size;
|
||||
unsigned int pkt_size;
|
||||
struct sk_buff *skb;
|
||||
/* read size+status of next frame from DMA ring buffer */
|
||||
/* the number 16 and 4 are just examples */
|
||||
rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset));
|
||||
rx_size = rx_status >> 16;
|
||||
pkt_size = rx_size - 4;
|
||||
|
||||
/* process errors */
|
||||
if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) ||
|
||||
(!(rx_status & RxStatusOK))) {
|
||||
netdrv_rx_err (rx_status, dev, tp, ioaddr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (--rx_work_limit < 0)
|
||||
break;
|
||||
|
||||
/* grab a skb */
|
||||
skb = dev_alloc_skb (pkt_size + 2);
|
||||
if (skb) {
|
||||
.
|
||||
.
|
||||
netif_rx (skb);
|
||||
.
|
||||
.
|
||||
} else { /* OOM */
|
||||
/*seems very driver specific ... some just pass
|
||||
whatever is on the ring already. */
|
||||
}
|
||||
|
||||
/* move to the next skb on the ring */
|
||||
entry = (++tp->cur_rx) % RX_RING_SIZE;
|
||||
received++ ;
|
||||
|
||||
}
|
||||
|
||||
/* store current ring pointer state */
|
||||
tp->cur_rx = cur_rx;
|
||||
|
||||
/* Refill the Rx ring buffers if they are needed */
|
||||
refill_rx_ring();
|
||||
.
|
||||
.
|
||||
|
||||
}
|
||||
-------------------------------------------------------------------
|
||||
We change it to a new one below; note the additional parameter in
|
||||
the call.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
/* this is called by the network core */
|
||||
static int my_poll (struct net_device *dev, int *budget)
|
||||
{
|
||||
|
||||
struct my_private *tp = (struct my_private *)dev->priv;
|
||||
rx_ring = tp->rx_ring;
|
||||
cur_rx = tp->cur_rx;
|
||||
int entry = cur_rx % RX_BUF_LEN;
|
||||
/* maximum packets to send to the stack */
|
||||
/************************ note note *********************************/
|
||||
int rx_work_limit = dev->quota;
|
||||
|
||||
/************************ end note note *********************************/
|
||||
do { // outer beginning loop starts here
|
||||
|
||||
clear_rx_status_register_bit();
|
||||
|
||||
while (rx_ring_not_empty) {
|
||||
u32 rx_status;
|
||||
unsigned int rx_size;
|
||||
unsigned int pkt_size;
|
||||
struct sk_buff *skb;
|
||||
/* read size+status of next frame from DMA ring buffer */
|
||||
/* the number 16 and 4 are just examples */
|
||||
rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset));
|
||||
rx_size = rx_status >> 16;
|
||||
pkt_size = rx_size - 4;
|
||||
|
||||
/* process errors */
|
||||
if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) ||
|
||||
(!(rx_status & RxStatusOK))) {
|
||||
netdrv_rx_err (rx_status, dev, tp, ioaddr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/************************ note note *********************************/
|
||||
if (--rx_work_limit < 0) { /* we got packets, but no quota */
|
||||
/* store current ring pointer state */
|
||||
tp->cur_rx = cur_rx;
|
||||
|
||||
/* Refill the Rx ring buffers if they are needed */
|
||||
refill_rx_ring(dev);
|
||||
goto not_done;
|
||||
}
|
||||
/********************** end note **********************************/
|
||||
|
||||
/* grab a skb */
|
||||
skb = dev_alloc_skb (pkt_size + 2);
|
||||
if (skb) {
|
||||
.
|
||||
.
|
||||
/************************ note note *********************************/
|
||||
netif_receive_skb (skb);
|
||||
/********************** end note **********************************/
|
||||
.
|
||||
.
|
||||
} else { /* OOM */
|
||||
/*seems very driver specific ... common is just pass
|
||||
whatever is on the ring already. */
|
||||
}
|
||||
|
||||
/* move to the next skb on the ring */
|
||||
entry = (++tp->cur_rx) % RX_RING_SIZE;
|
||||
received++ ;
|
||||
|
||||
}
|
||||
|
||||
/* store current ring pointer state */
|
||||
tp->cur_rx = cur_rx;
|
||||
|
||||
/* Refill the Rx ring buffers if they are needed */
|
||||
refill_rx_ring(dev);
|
||||
|
||||
/* no packets on ring; but new ones can arrive since we last
|
||||
checked */
|
||||
status = read_interrupt_status_reg();
|
||||
if (rx status is not set) {
|
||||
/* If something arrives in this narrow window,
|
||||
an interrupt will be generated */
|
||||
goto done;
|
||||
}
|
||||
/* done! at least that's what it looks like ;->
|
||||
if new packets came in after our last check on status bits
|
||||
they'll be caught by the while check and we go back and clear them
|
||||
since we havent exceeded our quota */
|
||||
} while (rx_status_is_set);
|
||||
|
||||
done:
|
||||
|
||||
/************************ note note *********************************/
|
||||
dev->quota -= received;
|
||||
*budget -= received;
|
||||
|
||||
/* If RX ring is not full we are out of memory. */
|
||||
if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
|
||||
goto oom;
|
||||
|
||||
/* we are happy/done, no more packets on ring; put us back
|
||||
to where we can start processing interrupts again */
|
||||
netif_rx_complete(dev);
|
||||
enable_rx_and_rxnobuf_ints();
|
||||
|
||||
/* The last op happens after poll completion. Which means the following:
|
||||
* 1. it can race with disabling irqs in irq handler (which are done to
|
||||
* schedule polls)
|
||||
* 2. it can race with dis/enabling irqs in other poll threads
|
||||
* 3. if an irq raised after the beginning of the outer beginning
|
||||
* loop (marked in the code above), it will be immediately
|
||||
* triggered here.
|
||||
*
|
||||
* Summarizing: the logic may result in some redundant irqs both
|
||||
* due to races in masking and due to too late acking of already
|
||||
* processed irqs. The good news: no events are ever lost.
|
||||
*/
|
||||
|
||||
return 0; /* done */
|
||||
|
||||
not_done:
|
||||
if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/2 ||
|
||||
tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
|
||||
refill_rx_ring(dev);
|
||||
|
||||
if (!received) {
|
||||
printk("received==0\n");
|
||||
received = 1;
|
||||
}
|
||||
dev->quota -= received;
|
||||
*budget -= received;
|
||||
return 1; /* not_done */
|
||||
|
||||
oom:
|
||||
/* Start timer, stop polling, but do not enable rx interrupts. */
|
||||
start_poll_timer(dev);
|
||||
return 0; /* we'll take it from here so tell core "done"*/
|
||||
|
||||
/************************ End note note *********************************/
|
||||
}
|
||||
-------------------------------------------------------------------
|
||||
|
||||
From above we note that:
|
||||
0) rx_work_limit = dev->quota
|
||||
1) refill_rx_ring() is in charge of clearing the bit for rxnobuff when
|
||||
it does the work.
|
||||
2) We have a done and not_done state.
|
||||
3) instead of netif_rx() we call netif_receive_skb() to pass the skb.
|
||||
4) we have a new way of handling oom condition
|
||||
5) A new outer for (;;) loop has been added. This serves the purpose of
|
||||
ensuring that if a new packet has come in, after we are all set and done,
|
||||
and we have not exceeded our quota that we continue sending packets up.
|
||||
|
||||
|
||||
-----------------------------------------------------------
|
||||
Poll timer code will need to do the following:
|
||||
|
||||
a)
|
||||
|
||||
if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/2 ||
|
||||
tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
|
||||
refill_rx_ring(dev);
|
||||
|
||||
/* If RX ring is not full we are still out of memory.
|
||||
Restart the timer again. Else we re-add ourselves
|
||||
to the master poll list.
|
||||
*/
|
||||
|
||||
if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
|
||||
restart_timer();
|
||||
|
||||
else netif_rx_schedule(dev); /* we are back on the poll list */
|
||||
|
||||
5) dev->close() and dev->suspend() issues
|
||||
==========================================
|
||||
The driver writer needn't worry about this; the top net layer takes
|
||||
care of it.
|
||||
|
||||
6) Adding new Stats to /proc
|
||||
=============================
|
||||
In order to debug some of the new features, we introduce new stats
|
||||
that need to be collected.
|
||||
TODO: Fill this later.
|
||||
|
||||
APPENDIX 1: discussion on using ethernet HW FC
|
||||
==============================================
|
||||
Most chips with FC only send a pause packet when they run out of Rx buffers.
|
||||
Since packets are pulled off the DMA ring by a softirq in NAPI,
|
||||
if the system is slow in grabbing them and we have a high input
|
||||
rate (faster than the system's capacity to remove packets), then theoretically
|
||||
there will only be one rx interrupt for all packets during a given packetstorm.
|
||||
Under low load, we might have a single interrupt per packet.
|
||||
FC should be programmed to apply in the case when the system cant pull out
|
||||
packets fast enough i.e send a pause only when you run out of rx buffers.
|
||||
Note FC in itself is a good solution but we have found it to not be
|
||||
much of a commodity feature (both in NICs and switches) and hence falls
|
||||
under the same category as using NIC based mitigation. Also, experiments
|
||||
indicate that it's much harder to resolve the resource allocation
|
||||
issue (aka lazy receiving that NAPI offers) and hence quantify its usefulness
|
||||
proved harder. In any case, FC works even better with NAPI but is not
|
||||
necessary.
|
||||
|
||||
|
||||
APPENDIX 2: the "rotting packet" race-window avoidance scheme
|
||||
=============================================================
|
||||
|
||||
There are two types of associations seen here
|
||||
|
||||
1) status/int which honors level triggered IRQ
|
||||
|
||||
If a status bit for receive or rxnobuff is set and the corresponding
|
||||
interrupt-enable bit is not on, then no interrupts will be generated. However,
|
||||
as soon as the "interrupt-enable" bit is unmasked, an immediate interrupt is
|
||||
generated. [assuming the status bit was not turned off].
|
||||
Generally the concept of level triggered IRQs in association with a status and
|
||||
interrupt-enable CSR register set is used to avoid the race.
|
||||
|
||||
If we take the example of the tulip:
|
||||
"pending work" is indicated by the status bit(CSR5 in tulip).
|
||||
the corresponding interrupt bit (CSR7 in tulip) might be turned off (but
|
||||
the CSR5 will continue to be turned on with new packet arrivals even if
|
||||
we clear it the first time)
|
||||
Very important is the fact that if we turn on the interrupt bit on when
|
||||
status is set that an immediate irq is triggered.
|
||||
|
||||
If we cleared the rx ring and proclaimed there was "no more work
|
||||
to be done" and then went on to do a few other things; then when we enable
|
||||
interrupts, there is a possibility that a new packet might sneak in during
|
||||
this phase. It helps to look at the pseudo code for the tulip poll
|
||||
routine:
|
||||
|
||||
--------------------------
|
||||
do {
|
||||
ACK;
|
||||
while (ring_is_not_empty()) {
|
||||
work-work-work
|
||||
if quota is exceeded: exit, no touching irq status/mask
|
||||
}
|
||||
/* No packets, but new can arrive while we are doing this*/
|
||||
CSR5 := read
|
||||
if (CSR5 is not set) {
|
||||
/* If something arrives in this narrow window here,
|
||||
* where the comments are ;-> irq will be generated */
|
||||
unmask irqs;
|
||||
exit poll;
|
||||
}
|
||||
} while (rx_status_is_set);
|
||||
------------------------
|
||||
|
||||
CSR5 bit of interest is only the rx status.
|
||||
If you look at the last if statement:
|
||||
you just finished grabbing all the packets from the rx ring .. you check if
|
||||
status bit says there are more packets just in ... it says none; you then
|
||||
enable rx interrupts again; if a new packet just came in during this check,
|
||||
we are counting that CSR5 will be set in that small window of opportunity
|
||||
and that by re-enabling interrupts, we would actually trigger an interrupt
|
||||
to register the new packet for processing.
|
||||
|
||||
[The above description nay be very verbose, if you have better wording
|
||||
that will make this more understandable, please suggest it.]
|
||||
|
||||
2) non-capable hardware
|
||||
|
||||
These do not generally respect level triggered IRQs. Normally,
|
||||
irqs may be lost while being masked and the only way to leave poll is to do
|
||||
a double check for new input after netif_rx_complete() is invoked
|
||||
and re-enable polling (after seeing this new input).
|
||||
|
||||
Sample code:
|
||||
|
||||
---------
|
||||
.
|
||||
.
|
||||
restart_poll:
|
||||
while (ring_is_not_empty()) {
|
||||
work-work-work
|
||||
if quota is exceeded: exit, not touching irq status/mask
|
||||
}
|
||||
.
|
||||
.
|
||||
.
|
||||
enable_rx_interrupts()
|
||||
netif_rx_complete(dev);
|
||||
if (ring_has_new_packet() && netif_rx_reschedule(dev, received)) {
|
||||
disable_rx_and_rxnobufs()
|
||||
goto restart_poll
|
||||
} while (rx_status_is_set);
|
||||
---------
|
||||
|
||||
Basically netif_rx_complete() removes us from the poll list, but because a
|
||||
new packet which will never be caught due to the possibility of a race
|
||||
might come in, we attempt to re-add ourselves to the poll list.
|
||||
|
||||
|
||||
|
||||
|
||||
APPENDIX 3: Scheduling issues.
|
||||
==============================
|
||||
As seen NAPI moves processing to softirq level. Linux uses the ksoftirqd as the
|
||||
general solution to schedule softirq's to run before next interrupt and by putting
|
||||
them under scheduler control. Also this prevents consecutive softirq's from
|
||||
monopolize the CPU. This also have the effect that the priority of ksoftirq needs
|
||||
to be considered when running very CPU-intensive applications and networking to
|
||||
get the proper balance of softirq/user balance. Increasing ksoftirq priority to 0
|
||||
(eventually more) is reported cure problems with low network performance at high
|
||||
CPU load.
|
||||
|
||||
Most used processes in a GIGE router:
|
||||
USER PID %CPU %MEM SIZE RSS TTY STAT START TIME COMMAND
|
||||
root 3 0.2 0.0 0 0 ? RWN Aug 15 602:00 (ksoftirqd_CPU0)
|
||||
root 232 0.0 7.9 41400 40884 ? S Aug 15 74:12 gated
|
||||
|
||||
--------------------------------------------------------------------
|
||||
|
||||
relevant sites:
|
||||
==================
|
||||
ftp://robur.slu.se/pub/Linux/net-development/NAPI/
|
||||
|
||||
|
||||
--------------------------------------------------------------------
|
||||
TODO: Write net-skeleton.c driver.
|
||||
-------------------------------------------------------------
|
||||
|
||||
Authors:
|
||||
========
|
||||
Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
|
||||
Jamal Hadi Salim <hadi@cyberus.ca>
|
||||
Robert Olsson <Robert.Olsson@data.slu.se>
|
||||
|
||||
Acknowledgements:
|
||||
================
|
||||
People who made this document better:
|
||||
|
||||
Lennert Buytenhek <buytenh@gnu.org>
|
||||
Andrew Morton <akpm@zip.com.au>
|
||||
Manfred Spraul <manfred@colorfullife.com>
|
||||
Donald Becker <becker@scyld.com>
|
||||
Jeff Garzik <jgarzik@pobox.com>
|
@ -38,8 +38,13 @@ Socket options
|
||||
DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
|
||||
service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
|
||||
the socket will fall back to 0 (which means that no meaningful service code
|
||||
is present). Connecting sockets set at most one service option; for
|
||||
listening sockets, multiple service codes can be specified.
|
||||
is present). On active sockets this is set before connect(); specifying more
|
||||
than one code has no effect (all subsequent service codes are ignored). The
|
||||
case is different for passive sockets, where multiple service codes (up to 32)
|
||||
can be set before calling bind().
|
||||
|
||||
DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
|
||||
size (application payload size) in bytes, see RFC 4340, section 14.
|
||||
|
||||
DCCP_SOCKOPT_SEND_CSCOV and DCCP_SOCKOPT_RECV_CSCOV are used for setting the
|
||||
partial checksum coverage (RFC 4340, sec. 9.2). The default is that checksums
|
||||
@ -50,12 +55,13 @@ be enabled at the receiver, too with suitable choice of CsCov.
|
||||
DCCP_SOCKOPT_SEND_CSCOV sets the sender checksum coverage. Values in the
|
||||
range 0..15 are acceptable. The default setting is 0 (full coverage),
|
||||
values between 1..15 indicate partial coverage.
|
||||
DCCP_SOCKOPT_SEND_CSCOV is for the receiver and has a different meaning: it
|
||||
DCCP_SOCKOPT_RECV_CSCOV is for the receiver and has a different meaning: it
|
||||
sets a threshold, where again values 0..15 are acceptable. The default
|
||||
of 0 means that all packets with a partial coverage will be discarded.
|
||||
Values in the range 1..15 indicate that packets with minimally such a
|
||||
coverage value are also acceptable. The higher the number, the more
|
||||
restrictive this setting (see [RFC 4340, sec. 9.2.1]).
|
||||
restrictive this setting (see [RFC 4340, sec. 9.2.1]). Partial coverage
|
||||
settings are inherited to the child socket after accept().
|
||||
|
||||
The following two options apply to CCID 3 exclusively and are getsockopt()-only.
|
||||
In either case, a TFRC info struct (defined in <linux/tfrc.h>) is returned.
|
||||
@ -112,9 +118,14 @@ tx_qlen = 5
|
||||
The size of the transmit buffer in packets. A value of 0 corresponds
|
||||
to an unbounded transmit buffer.
|
||||
|
||||
sync_ratelimit = 125 ms
|
||||
The timeout between subsequent DCCP-Sync packets sent in response to
|
||||
sequence-invalid packets on the same socket (RFC 4340, 7.5.4). The unit
|
||||
of this parameter is milliseconds; a value of 0 disables rate-limiting.
|
||||
|
||||
Notes
|
||||
=====
|
||||
|
||||
DCCP does not travel through NAT successfully at present on many boxes. This is
|
||||
because the checksum covers the psuedo-header as per TCP and UDP. Linux NAT
|
||||
because the checksum covers the pseudo-header as per TCP and UDP. Linux NAT
|
||||
support for DCCP has been added.
|
||||
|
@ -1,52 +0,0 @@
|
||||
The Digi International RightSwitch SE-X (dgrs) Device Driver
|
||||
|
||||
This is a Linux driver for the Digi International RightSwitch SE-X
|
||||
EISA and PCI boards. These are 4 (EISA) or 6 (PCI) port Ethernet
|
||||
switches and a NIC combined into a single board. This driver can
|
||||
be compiled into the kernel statically or as a loadable module.
|
||||
|
||||
There is also a companion management tool, called "xrightswitch".
|
||||
The management tool lets you watch the performance graphically,
|
||||
as well as set the SNMP agent IP and IPX addresses, IEEE Spanning
|
||||
Tree, and Aging time. These can also be set from the command line
|
||||
when the driver is loaded. The driver command line options are:
|
||||
|
||||
debug=NNN Debug printing level
|
||||
dma=0/1 Disable/Enable DMA on PCI card
|
||||
spantree=0/1 Disable/Enable IEEE spanning tree
|
||||
hashexpire=NNN Change address aging time (default 300 seconds)
|
||||
ipaddr=A,B,C,D Set SNMP agent IP address i.e. 199,86,8,221
|
||||
iptrap=A,B,C,D Set SNMP agent IP trap address i.e. 199,86,8,221
|
||||
ipxnet=NNN Set SNMP agent IPX network number
|
||||
nicmode=0/1 Disable/Enable multiple NIC mode
|
||||
|
||||
There is also a tool for setting up input and output packet filters
|
||||
on each port, called "dgrsfilt".
|
||||
|
||||
Both the management tool and the filtering tool are available
|
||||
separately from the following FTP site:
|
||||
|
||||
ftp://ftp.dgii.com/drivers/rightswitch/linux/
|
||||
|
||||
When nicmode=1, the board and driver operate as 4 or 6 individual
|
||||
NIC ports (eth0...eth5) instead of as a switch. All switching
|
||||
functions are disabled. In the future, the board firmware may include
|
||||
a routing cache when in this mode.
|
||||
|
||||
Copyright 1995-1996 Digi International Inc.
|
||||
|
||||
This software may be used and distributed according to the terms
|
||||
of the GNU General Public License, incorporated herein by reference.
|
||||
|
||||
For information on purchasing a RightSwitch SE-4 or SE-6
|
||||
board, please contact Digi's sales department at 1-612-912-3444
|
||||
or 1-800-DIGIBRD. Outside the U.S., please check our Web page at:
|
||||
|
||||
http://www.dgii.com
|
||||
|
||||
for sales offices worldwide. Tech support is also available through
|
||||
the channels listed on the Web site, although as long as I am
|
||||
employed on networking products at Digi I will be happy to provide
|
||||
any bug fixes that may be needed.
|
||||
|
||||
-Rick Richardson, rick@dgii.com
|
@ -180,13 +180,20 @@ tcp_fin_timeout - INTEGER
|
||||
to live longer. Cf. tcp_max_orphans.
|
||||
|
||||
tcp_frto - INTEGER
|
||||
Enables F-RTO, an enhanced recovery algorithm for TCP retransmission
|
||||
Enables Forward RTO-Recovery (F-RTO) defined in RFC4138.
|
||||
F-RTO is an enhanced recovery algorithm for TCP retransmission
|
||||
timeouts. It is particularly beneficial in wireless environments
|
||||
where packet loss is typically due to random radio interference
|
||||
rather than intermediate router congestion. If set to 1, basic
|
||||
version is enabled. 2 enables SACK enhanced F-RTO, which is
|
||||
EXPERIMENTAL. The basic version can be used also when SACK is
|
||||
enabled for a flow through tcp_sack sysctl.
|
||||
rather than intermediate router congestion. FRTO is sender-side
|
||||
only modification. Therefore it does not require any support from
|
||||
the peer, but in a typical case, however, where wireless link is
|
||||
the local access link and most of the data flows downlink, the
|
||||
faraway servers should have FRTO enabled to take advantage of it.
|
||||
If set to 1, basic version is enabled. 2 enables SACK enhanced
|
||||
F-RTO if flow uses SACK. The basic version can be used also when
|
||||
SACK is in use though scenario(s) with it exists where FRTO
|
||||
interacts badly with the packet counting of the SACK enabled TCP
|
||||
flow.
|
||||
|
||||
tcp_frto_response - INTEGER
|
||||
When F-RTO has detected that a TCP retransmission timeout was
|
||||
|
@ -13,15 +13,35 @@ The radiotap format is discussed in
|
||||
./Documentation/networking/radiotap-headers.txt.
|
||||
|
||||
Despite 13 radiotap argument types are currently defined, most only make sense
|
||||
to appear on received packets. Currently three kinds of argument are used by
|
||||
the injection code, although it knows to skip any other arguments that are
|
||||
present (facilitating replay of captured radiotap headers directly):
|
||||
to appear on received packets. The following information is parsed from the
|
||||
radiotap headers and used to control injection:
|
||||
|
||||
- IEEE80211_RADIOTAP_RATE - u8 arg in 500kbps units (0x02 --> 1Mbps)
|
||||
* IEEE80211_RADIOTAP_RATE
|
||||
|
||||
- IEEE80211_RADIOTAP_ANTENNA - u8 arg, 0x00 = ant1, 0x01 = ant2
|
||||
rate in 500kbps units, automatic if invalid or not present
|
||||
|
||||
- IEEE80211_RADIOTAP_DBM_TX_POWER - u8 arg, dBm
|
||||
|
||||
* IEEE80211_RADIOTAP_ANTENNA
|
||||
|
||||
antenna to use, automatic if not present
|
||||
|
||||
|
||||
* IEEE80211_RADIOTAP_DBM_TX_POWER
|
||||
|
||||
transmit power in dBm, automatic if not present
|
||||
|
||||
|
||||
* IEEE80211_RADIOTAP_FLAGS
|
||||
|
||||
IEEE80211_RADIOTAP_F_FCS: FCS will be removed and recalculated
|
||||
IEEE80211_RADIOTAP_F_WEP: frame will be encrypted if key available
|
||||
IEEE80211_RADIOTAP_F_FRAG: frame will be fragmented if longer than the
|
||||
current fragmentation threshold. Note that
|
||||
this flag is only reliable when software
|
||||
fragmentation is enabled)
|
||||
|
||||
The injection code can also skip all other currently defined radiotap fields
|
||||
facilitating replay of captured radiotap headers directly.
|
||||
|
||||
Here is an example valid radiotap header defining these three parameters
|
||||
|
||||
|
@ -58,9 +58,13 @@ software, so it's a straight round-robin qdisc. It uses the same syntax and
|
||||
classification priomap that sch_prio uses, so it should be intuitive to
|
||||
configure for people who've used sch_prio.
|
||||
|
||||
The PRIO qdisc naturally plugs into a multiqueue device. If PRIO has been
|
||||
built with NET_SCH_PRIO_MQ, then upon load, it will make sure the number of
|
||||
bands requested is equal to the number of queues on the hardware. If they
|
||||
In order to utilitize the multiqueue features of the qdiscs, the network
|
||||
device layer needs to enable multiple queue support. This can be done by
|
||||
selecting NETDEVICES_MULTIQUEUE under Drivers.
|
||||
|
||||
The PRIO qdisc naturally plugs into a multiqueue device. If
|
||||
NETDEVICES_MULTIQUEUE is selected, then on qdisc load, the number of
|
||||
bands requested is compared to the number of queues on the hardware. If they
|
||||
are equal, it sets a one-to-one mapping up between the queues and bands. If
|
||||
they're not equal, it will not load the qdisc. This is the same behavior
|
||||
for RR. Once the association is made, any skb that is classified will have
|
||||
|
@ -3,6 +3,10 @@ started by Ingo Molnar <mingo@redhat.com>, 2001.09.17
|
||||
2.6 port and netpoll api by Matt Mackall <mpm@selenic.com>, Sep 9 2003
|
||||
|
||||
Please send bug reports to Matt Mackall <mpm@selenic.com>
|
||||
and Satyam Sharma <satyam.sharma@gmail.com>
|
||||
|
||||
Introduction:
|
||||
=============
|
||||
|
||||
This module logs kernel printk messages over UDP allowing debugging of
|
||||
problem where disk logging fails and serial consoles are impractical.
|
||||
@ -13,6 +17,9 @@ the specified interface as soon as possible. While this doesn't allow
|
||||
capture of early kernel panics, it does capture most of the boot
|
||||
process.
|
||||
|
||||
Sender and receiver configuration:
|
||||
==================================
|
||||
|
||||
It takes a string configuration parameter "netconsole" in the
|
||||
following format:
|
||||
|
||||
@ -34,21 +41,113 @@ Examples:
|
||||
|
||||
insmod netconsole netconsole=@/,@10.0.0.2/
|
||||
|
||||
It also supports logging to multiple remote agents by specifying
|
||||
parameters for the multiple agents separated by semicolons and the
|
||||
complete string enclosed in "quotes", thusly:
|
||||
|
||||
modprobe netconsole netconsole="@/,@10.0.0.2/;@/eth1,6892@10.0.0.3/"
|
||||
|
||||
Built-in netconsole starts immediately after the TCP stack is
|
||||
initialized and attempts to bring up the supplied dev at the supplied
|
||||
address.
|
||||
|
||||
The remote host can run either 'netcat -u -l -p <port>' or syslogd.
|
||||
|
||||
Dynamic reconfiguration:
|
||||
========================
|
||||
|
||||
Dynamic reconfigurability is a useful addition to netconsole that enables
|
||||
remote logging targets to be dynamically added, removed, or have their
|
||||
parameters reconfigured at runtime from a configfs-based userspace interface.
|
||||
[ Note that the parameters of netconsole targets that were specified/created
|
||||
from the boot/module option are not exposed via this interface, and hence
|
||||
cannot be modified dynamically. ]
|
||||
|
||||
To include this feature, select CONFIG_NETCONSOLE_DYNAMIC when building the
|
||||
netconsole module (or kernel, if netconsole is built-in).
|
||||
|
||||
Some examples follow (where configfs is mounted at the /sys/kernel/config
|
||||
mountpoint).
|
||||
|
||||
To add a remote logging target (target names can be arbitrary):
|
||||
|
||||
cd /sys/kernel/config/netconsole/
|
||||
mkdir target1
|
||||
|
||||
Note that newly created targets have default parameter values (as mentioned
|
||||
above) and are disabled by default -- they must first be enabled by writing
|
||||
"1" to the "enabled" attribute (usually after setting parameters accordingly)
|
||||
as described below.
|
||||
|
||||
To remove a target:
|
||||
|
||||
rmdir /sys/kernel/config/netconsole/othertarget/
|
||||
|
||||
The interface exposes these parameters of a netconsole target to userspace:
|
||||
|
||||
enabled Is this target currently enabled? (read-write)
|
||||
dev_name Local network interface name (read-write)
|
||||
local_port Source UDP port to use (read-write)
|
||||
remote_port Remote agent's UDP port (read-write)
|
||||
local_ip Source IP address to use (read-write)
|
||||
remote_ip Remote agent's IP address (read-write)
|
||||
local_mac Local interface's MAC address (read-only)
|
||||
remote_mac Remote agent's MAC address (read-write)
|
||||
|
||||
The "enabled" attribute is also used to control whether the parameters of
|
||||
a target can be updated or not -- you can modify the parameters of only
|
||||
disabled targets (i.e. if "enabled" is 0).
|
||||
|
||||
To update a target's parameters:
|
||||
|
||||
cat enabled # check if enabled is 1
|
||||
echo 0 > enabled # disable the target (if required)
|
||||
echo eth2 > dev_name # set local interface
|
||||
echo 10.0.0.4 > remote_ip # update some parameter
|
||||
echo cb:a9:87:65:43:21 > remote_mac # update more parameters
|
||||
echo 1 > enabled # enable target again
|
||||
|
||||
You can also update the local interface dynamically. This is especially
|
||||
useful if you want to use interfaces that have newly come up (and may not
|
||||
have existed when netconsole was loaded / initialized).
|
||||
|
||||
Miscellaneous notes:
|
||||
====================
|
||||
|
||||
WARNING: the default target ethernet setting uses the broadcast
|
||||
ethernet address to send packets, which can cause increased load on
|
||||
other systems on the same ethernet segment.
|
||||
|
||||
TIP: some LAN switches may be configured to suppress ethernet broadcasts
|
||||
so it is advised to explicitly specify the remote agents' MAC addresses
|
||||
from the config parameters passed to netconsole.
|
||||
|
||||
TIP: to find out the MAC address of, say, 10.0.0.2, you may try using:
|
||||
|
||||
ping -c 1 10.0.0.2 ; /sbin/arp -n | grep 10.0.0.2
|
||||
|
||||
TIP: in case the remote logging agent is on a separate LAN subnet than
|
||||
the sender, it is suggested to try specifying the MAC address of the
|
||||
default gateway (you may use /sbin/route -n to find it out) as the
|
||||
remote MAC address instead.
|
||||
|
||||
NOTE: the network device (eth1 in the above case) can run any kind
|
||||
of other network traffic, netconsole is not intrusive. Netconsole
|
||||
might cause slight delays in other traffic if the volume of kernel
|
||||
messages is high, but should have no other impact.
|
||||
|
||||
NOTE: if you find that the remote logging agent is not receiving or
|
||||
printing all messages from the sender, it is likely that you have set
|
||||
the "console_loglevel" parameter (on the sender) to only send high
|
||||
priority messages to the console. You can change this at runtime using:
|
||||
|
||||
dmesg -n 8
|
||||
|
||||
or by specifying "debug" on the kernel command line at boot, to send
|
||||
all kernel messages to the console. A specific value for this parameter
|
||||
can also be set using the "loglevel" kernel boot option. See the
|
||||
dmesg(8) man page and Documentation/kernel-parameters.txt for details.
|
||||
|
||||
Netconsole was designed to be as instantaneous as possible, to
|
||||
enable the logging of even the most critical kernel bugs. It works
|
||||
from IRQ contexts as well, and does not enable interrupts while
|
||||
|
@ -73,7 +73,8 @@ dev->hard_start_xmit:
|
||||
has to lock by itself when needed. It is recommended to use a try lock
|
||||
for this and return NETDEV_TX_LOCKED when the spin lock fails.
|
||||
The locking there should also properly protect against
|
||||
set_multicast_list.
|
||||
set_multicast_list. Note that the use of NETIF_F_LLTX is deprecated.
|
||||
Dont use it for new drivers.
|
||||
|
||||
Context: Process with BHs disabled or BH (timer),
|
||||
will be called with interrupts disabled by netconsole.
|
||||
@ -95,9 +96,13 @@ dev->set_multicast_list:
|
||||
Synchronization: netif_tx_lock spinlock.
|
||||
Context: BHs disabled
|
||||
|
||||
dev->poll:
|
||||
Synchronization: __LINK_STATE_RX_SCHED bit in dev->state. See
|
||||
dev_close code and comments in net/core/dev.c for more info.
|
||||
struct napi_struct synchronization rules
|
||||
========================================
|
||||
napi->poll:
|
||||
Synchronization: NAPI_STATE_SCHED bit in napi->state. Device
|
||||
driver's dev->close method will invoke napi_disable() on
|
||||
all NAPI instances which will do a sleeping poll on the
|
||||
NAPI_STATE_SCHED napi->state bit, waiting for all pending
|
||||
NAPI activity to cease.
|
||||
Context: softirq
|
||||
will be called with interrupts disabled by netconsole.
|
||||
|
||||
|
568
Documentation/networking/sk98lin.txt
Normal file
568
Documentation/networking/sk98lin.txt
Normal file
@ -0,0 +1,568 @@
|
||||
(C)Copyright 1999-2004 Marvell(R).
|
||||
All rights reserved
|
||||
===========================================================================
|
||||
|
||||
sk98lin.txt created 13-Feb-2004
|
||||
|
||||
Readme File for sk98lin v6.23
|
||||
Marvell Yukon/SysKonnect SK-98xx Gigabit Ethernet Adapter family driver for LINUX
|
||||
|
||||
This file contains
|
||||
1 Overview
|
||||
2 Required Files
|
||||
3 Installation
|
||||
3.1 Driver Installation
|
||||
3.2 Inclusion of adapter at system start
|
||||
4 Driver Parameters
|
||||
4.1 Per-Port Parameters
|
||||
4.2 Adapter Parameters
|
||||
5 Large Frame Support
|
||||
6 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad)
|
||||
7 Troubleshooting
|
||||
|
||||
===========================================================================
|
||||
|
||||
|
||||
1 Overview
|
||||
===========
|
||||
|
||||
The sk98lin driver supports the Marvell Yukon and SysKonnect
|
||||
SK-98xx/SK-95xx compliant Gigabit Ethernet Adapter on Linux. It has
|
||||
been tested with Linux on Intel/x86 machines.
|
||||
***
|
||||
|
||||
|
||||
2 Required Files
|
||||
=================
|
||||
|
||||
The linux kernel source.
|
||||
No additional files required.
|
||||
***
|
||||
|
||||
|
||||
3 Installation
|
||||
===============
|
||||
|
||||
It is recommended to download the latest version of the driver from the
|
||||
SysKonnect web site www.syskonnect.com. If you have downloaded the latest
|
||||
driver, the Linux kernel has to be patched before the driver can be
|
||||
installed. For details on how to patch a Linux kernel, refer to the
|
||||
patch.txt file.
|
||||
|
||||
3.1 Driver Installation
|
||||
------------------------
|
||||
|
||||
The following steps describe the actions that are required to install
|
||||
the driver and to start it manually. These steps should be carried
|
||||
out for the initial driver setup. Once confirmed to be ok, they can
|
||||
be included in the system start.
|
||||
|
||||
NOTE 1: To perform the following tasks you need 'root' access.
|
||||
|
||||
NOTE 2: In case of problems, please read the section "Troubleshooting"
|
||||
below.
|
||||
|
||||
The driver can either be integrated into the kernel or it can be compiled
|
||||
as a module. Select the appropriate option during the kernel
|
||||
configuration.
|
||||
|
||||
Compile/use the driver as a module
|
||||
----------------------------------
|
||||
To compile the driver, go to the directory /usr/src/linux and
|
||||
execute the command "make menuconfig" or "make xconfig" and proceed as
|
||||
follows:
|
||||
|
||||
To integrate the driver permanently into the kernel, proceed as follows:
|
||||
|
||||
1. Select the menu "Network device support" and then "Ethernet(1000Mbit)"
|
||||
2. Mark "Marvell Yukon Chipset / SysKonnect SK-98xx family support"
|
||||
with (*)
|
||||
3. Build a new kernel when the configuration of the above options is
|
||||
finished.
|
||||
4. Install the new kernel.
|
||||
5. Reboot your system.
|
||||
|
||||
To use the driver as a module, proceed as follows:
|
||||
|
||||
1. Enable 'loadable module support' in the kernel.
|
||||
2. For automatic driver start, enable the 'Kernel module loader'.
|
||||
3. Select the menu "Network device support" and then "Ethernet(1000Mbit)"
|
||||
4. Mark "Marvell Yukon Chipset / SysKonnect SK-98xx family support"
|
||||
with (M)
|
||||
5. Execute the command "make modules".
|
||||
6. Execute the command "make modules_install".
|
||||
The appropriate modules will be installed.
|
||||
7. Reboot your system.
|
||||
|
||||
|
||||
Load the module manually
|
||||
------------------------
|
||||
To load the module manually, proceed as follows:
|
||||
|
||||
1. Enter "modprobe sk98lin".
|
||||
2. If a Marvell Yukon or SysKonnect SK-98xx adapter is installed in
|
||||
your computer and you have a /proc file system, execute the command:
|
||||
"ls /proc/net/sk98lin/"
|
||||
This should produce an output containing a line with the following
|
||||
format:
|
||||
eth0 eth1 ...
|
||||
which indicates that your adapter has been found and initialized.
|
||||
|
||||
NOTE 1: If you have more than one Marvell Yukon or SysKonnect SK-98xx
|
||||
adapter installed, the adapters will be listed as 'eth0',
|
||||
'eth1', 'eth2', etc.
|
||||
For each adapter, repeat steps 3 and 4 below.
|
||||
|
||||
NOTE 2: If you have other Ethernet adapters installed, your Marvell
|
||||
Yukon or SysKonnect SK-98xx adapter will be mapped to the
|
||||
next available number, e.g. 'eth1'. The mapping is executed
|
||||
automatically.
|
||||
The module installation message (displayed either in a system
|
||||
log file or on the console) prints a line for each adapter
|
||||
found containing the corresponding 'ethX'.
|
||||
|
||||
3. Select an IP address and assign it to the respective adapter by
|
||||
entering:
|
||||
ifconfig eth0 <ip-address>
|
||||
With this command, the adapter is connected to the Ethernet.
|
||||
|
||||
SK-98xx Gigabit Ethernet Server Adapters: The yellow LED on the adapter
|
||||
is now active, the link status LED of the primary port is active and
|
||||
the link status LED of the secondary port (on dual port adapters) is
|
||||
blinking (if the ports are connected to a switch or hub).
|
||||
SK-98xx V2.0 Gigabit Ethernet Adapters: The link status LED is active.
|
||||
In addition, you will receive a status message on the console stating
|
||||
"ethX: network connection up using port Y" and showing the selected
|
||||
connection parameters (x stands for the ethernet device number
|
||||
(0,1,2, etc), y stands for the port name (A or B)).
|
||||
|
||||
NOTE: If you are in doubt about IP addresses, ask your network
|
||||
administrator for assistance.
|
||||
|
||||
4. Your adapter should now be fully operational.
|
||||
Use 'ping <otherstation>' to verify the connection to other computers
|
||||
on your network.
|
||||
5. To check the adapter configuration view /proc/net/sk98lin/[devicename].
|
||||
For example by executing:
|
||||
"cat /proc/net/sk98lin/eth0"
|
||||
|
||||
Unload the module
|
||||
-----------------
|
||||
To stop and unload the driver modules, proceed as follows:
|
||||
|
||||
1. Execute the command "ifconfig eth0 down".
|
||||
2. Execute the command "rmmod sk98lin".
|
||||
|
||||
3.2 Inclusion of adapter at system start
|
||||
-----------------------------------------
|
||||
|
||||
Since a large number of different Linux distributions are
|
||||
available, we are unable to describe a general installation procedure
|
||||
for the driver module.
|
||||
Because the driver is now integrated in the kernel, installation should
|
||||
be easy, using the standard mechanism of your distribution.
|
||||
Refer to the distribution's manual for installation of ethernet adapters.
|
||||
|
||||
***
|
||||
|
||||
4 Driver Parameters
|
||||
====================
|
||||
|
||||
Parameters can be set at the command line after the module has been
|
||||
loaded with the command 'modprobe'.
|
||||
In some distributions, the configuration tools are able to pass parameters
|
||||
to the driver module.
|
||||
|
||||
If you use the kernel module loader, you can set driver parameters
|
||||
in the file /etc/modprobe.conf (or /etc/modules.conf in 2.4 or earlier).
|
||||
To set the driver parameters in this file, proceed as follows:
|
||||
|
||||
1. Insert a line of the form :
|
||||
options sk98lin ...
|
||||
For "...", the same syntax is required as described for the command
|
||||
line parameters of modprobe below.
|
||||
2. To activate the new parameters, either reboot your computer
|
||||
or
|
||||
unload and reload the driver.
|
||||
The syntax of the driver parameters is:
|
||||
|
||||
modprobe sk98lin parameter=value1[,value2[,value3...]]
|
||||
|
||||
where value1 refers to the first adapter, value2 to the second etc.
|
||||
|
||||
NOTE: All parameters are case sensitive. Write them exactly as shown
|
||||
below.
|
||||
|
||||
Example:
|
||||
Suppose you have two adapters. You want to set auto-negotiation
|
||||
on the first adapter to ON and on the second adapter to OFF.
|
||||
You also want to set DuplexCapabilities on the first adapter
|
||||
to FULL, and on the second adapter to HALF.
|
||||
Then, you must enter:
|
||||
|
||||
modprobe sk98lin AutoNeg_A=On,Off DupCap_A=Full,Half
|
||||
|
||||
NOTE: The number of adapters that can be configured this way is
|
||||
limited in the driver (file skge.c, constant SK_MAX_CARD_PARAM).
|
||||
The current limit is 16. If you happen to install
|
||||
more adapters, adjust this and recompile.
|
||||
|
||||
|
||||
4.1 Per-Port Parameters
|
||||
------------------------
|
||||
|
||||
These settings are available for each port on the adapter.
|
||||
In the following description, '?' stands for the port for
|
||||
which you set the parameter (A or B).
|
||||
|
||||
Speed
|
||||
-----
|
||||
Parameter: Speed_?
|
||||
Values: 10, 100, 1000, Auto
|
||||
Default: Auto
|
||||
|
||||
This parameter is used to set the speed capabilities. It is only valid
|
||||
for the SK-98xx V2.0 copper adapters.
|
||||
Usually, the speed is negotiated between the two ports during link
|
||||
establishment. If this fails, a port can be forced to a specific setting
|
||||
with this parameter.
|
||||
|
||||
Auto-Negotiation
|
||||
----------------
|
||||
Parameter: AutoNeg_?
|
||||
Values: On, Off, Sense
|
||||
Default: On
|
||||
|
||||
The "Sense"-mode automatically detects whether the link partner supports
|
||||
auto-negotiation or not.
|
||||
|
||||
Duplex Capabilities
|
||||
-------------------
|
||||
Parameter: DupCap_?
|
||||
Values: Half, Full, Both
|
||||
Default: Both
|
||||
|
||||
This parameters is only relevant if auto-negotiation for this port is
|
||||
not set to "Sense". If auto-negotiation is set to "On", all three values
|
||||
are possible. If it is set to "Off", only "Full" and "Half" are allowed.
|
||||
This parameter is useful if your link partner does not support all
|
||||
possible combinations.
|
||||
|
||||
Flow Control
|
||||
------------
|
||||
Parameter: FlowCtrl_?
|
||||
Values: Sym, SymOrRem, LocSend, None
|
||||
Default: SymOrRem
|
||||
|
||||
This parameter can be used to set the flow control capabilities the
|
||||
port reports during auto-negotiation. It can be set for each port
|
||||
individually.
|
||||
Possible modes:
|
||||
-- Sym = Symmetric: both link partners are allowed to send
|
||||
PAUSE frames
|
||||
-- SymOrRem = SymmetricOrRemote: both or only remote partner
|
||||
are allowed to send PAUSE frames
|
||||
-- LocSend = LocalSend: only local link partner is allowed
|
||||
to send PAUSE frames
|
||||
-- None = no link partner is allowed to send PAUSE frames
|
||||
|
||||
NOTE: This parameter is ignored if auto-negotiation is set to "Off".
|
||||
|
||||
Role in Master-Slave-Negotiation (1000Base-T only)
|
||||
--------------------------------------------------
|
||||
Parameter: Role_?
|
||||
Values: Auto, Master, Slave
|
||||
Default: Auto
|
||||
|
||||
This parameter is only valid for the SK-9821 and SK-9822 adapters.
|
||||
For two 1000Base-T ports to communicate, one must take the role of the
|
||||
master (providing timing information), while the other must be the
|
||||
slave. Usually, this is negotiated between the two ports during link
|
||||
establishment. If this fails, a port can be forced to a specific setting
|
||||
with this parameter.
|
||||
|
||||
|
||||
4.2 Adapter Parameters
|
||||
-----------------------
|
||||
|
||||
Connection Type (SK-98xx V2.0 copper adapters only)
|
||||
---------------
|
||||
Parameter: ConType
|
||||
Values: Auto, 100FD, 100HD, 10FD, 10HD
|
||||
Default: Auto
|
||||
|
||||
The parameter 'ConType' is a combination of all five per-port parameters
|
||||
within one single parameter. This simplifies the configuration of both ports
|
||||
of an adapter card! The different values of this variable reflect the most
|
||||
meaningful combinations of port parameters.
|
||||
|
||||
The following table shows the values of 'ConType' and the corresponding
|
||||
combinations of the per-port parameters:
|
||||
|
||||
ConType | DupCap AutoNeg FlowCtrl Role Speed
|
||||
----------+------------------------------------------------------
|
||||
Auto | Both On SymOrRem Auto Auto
|
||||
100FD | Full Off None Auto (ignored) 100
|
||||
100HD | Half Off None Auto (ignored) 100
|
||||
10FD | Full Off None Auto (ignored) 10
|
||||
10HD | Half Off None Auto (ignored) 10
|
||||
|
||||
Stating any other port parameter together with this 'ConType' variable
|
||||
will result in a merged configuration of those settings. This due to
|
||||
the fact, that the per-port parameters (e.g. Speed_? ) have a higher
|
||||
priority than the combined variable 'ConType'.
|
||||
|
||||
NOTE: This parameter is always used on both ports of the adapter card.
|
||||
|
||||
Interrupt Moderation
|
||||
--------------------
|
||||
Parameter: Moderation
|
||||
Values: None, Static, Dynamic
|
||||
Default: None
|
||||
|
||||
Interrupt moderation is employed to limit the maximum number of interrupts
|
||||
the driver has to serve. That is, one or more interrupts (which indicate any
|
||||
transmit or receive packet to be processed) are queued until the driver
|
||||
processes them. When queued interrupts are to be served, is determined by the
|
||||
'IntsPerSec' parameter, which is explained later below.
|
||||
|
||||
Possible modes:
|
||||
|
||||
-- None - No interrupt moderation is applied on the adapter card.
|
||||
Therefore, each transmit or receive interrupt is served immediately
|
||||
as soon as it appears on the interrupt line of the adapter card.
|
||||
|
||||
-- Static - Interrupt moderation is applied on the adapter card.
|
||||
All transmit and receive interrupts are queued until a complete
|
||||
moderation interval ends. If such a moderation interval ends, all
|
||||
queued interrupts are processed in one big bunch without any delay.
|
||||
The term 'static' reflects the fact, that interrupt moderation is
|
||||
always enabled, regardless how much network load is currently
|
||||
passing via a particular interface. In addition, the duration of
|
||||
the moderation interval has a fixed length that never changes while
|
||||
the driver is operational.
|
||||
|
||||
-- Dynamic - Interrupt moderation might be applied on the adapter card,
|
||||
depending on the load of the system. If the driver detects that the
|
||||
system load is too high, the driver tries to shield the system against
|
||||
too much network load by enabling interrupt moderation. If - at a later
|
||||
time - the CPU utilization decreases again (or if the network load is
|
||||
negligible) the interrupt moderation will automatically be disabled.
|
||||
|
||||
Interrupt moderation should be used when the driver has to handle one or more
|
||||
interfaces with a high network load, which - as a consequence - leads also to a
|
||||
high CPU utilization. When moderation is applied in such high network load
|
||||
situations, CPU load might be reduced by 20-30%.
|
||||
|
||||
NOTE: The drawback of using interrupt moderation is an increase of the round-
|
||||
trip-time (RTT), due to the queueing and serving of interrupts at dedicated
|
||||
moderation times.
|
||||
|
||||
Interrupts per second
|
||||
---------------------
|
||||
Parameter: IntsPerSec
|
||||
Values: 30...40000 (interrupts per second)
|
||||
Default: 2000
|
||||
|
||||
This parameter is only used if either static or dynamic interrupt moderation
|
||||
is used on a network adapter card. Using this parameter if no moderation is
|
||||
applied will lead to no action performed.
|
||||
|
||||
This parameter determines the length of any interrupt moderation interval.
|
||||
Assuming that static interrupt moderation is to be used, an 'IntsPerSec'
|
||||
parameter value of 2000 will lead to an interrupt moderation interval of
|
||||
500 microseconds.
|
||||
|
||||
NOTE: The duration of the moderation interval is to be chosen with care.
|
||||
At first glance, selecting a very long duration (e.g. only 100 interrupts per
|
||||
second) seems to be meaningful, but the increase of packet-processing delay
|
||||
is tremendous. On the other hand, selecting a very short moderation time might
|
||||
compensate the use of any moderation being applied.
|
||||
|
||||
|
||||
Preferred Port
|
||||
--------------
|
||||
Parameter: PrefPort
|
||||
Values: A, B
|
||||
Default: A
|
||||
|
||||
This is used to force the preferred port to A or B (on dual-port network
|
||||
adapters). The preferred port is the one that is used if both are detected
|
||||
as fully functional.
|
||||
|
||||
RLMT Mode (Redundant Link Management Technology)
|
||||
------------------------------------------------
|
||||
Parameter: RlmtMode
|
||||
Values: CheckLinkState,CheckLocalPort, CheckSeg, DualNet
|
||||
Default: CheckLinkState
|
||||
|
||||
RLMT monitors the status of the port. If the link of the active port
|
||||
fails, RLMT switches immediately to the standby link. The virtual link is
|
||||
maintained as long as at least one 'physical' link is up.
|
||||
|
||||
Possible modes:
|
||||
|
||||
-- CheckLinkState - Check link state only: RLMT uses the link state
|
||||
reported by the adapter hardware for each individual port to
|
||||
determine whether a port can be used for all network traffic or
|
||||
not.
|
||||
|
||||
-- CheckLocalPort - In this mode, RLMT monitors the network path
|
||||
between the two ports of an adapter by regularly exchanging packets
|
||||
between them. This mode requires a network configuration in which
|
||||
the two ports are able to "see" each other (i.e. there must not be
|
||||
any router between the ports).
|
||||
|
||||
-- CheckSeg - Check local port and segmentation: This mode supports the
|
||||
same functions as the CheckLocalPort mode and additionally checks
|
||||
network segmentation between the ports. Therefore, this mode is only
|
||||
to be used if Gigabit Ethernet switches are installed on the network
|
||||
that have been configured to use the Spanning Tree protocol.
|
||||
|
||||
-- DualNet - In this mode, ports A and B are used as separate devices.
|
||||
If you have a dual port adapter, port A will be configured as eth0
|
||||
and port B as eth1. Both ports can be used independently with
|
||||
distinct IP addresses. The preferred port setting is not used.
|
||||
RLMT is turned off.
|
||||
|
||||
NOTE: RLMT modes CLP and CLPSS are designed to operate in configurations
|
||||
where a network path between the ports on one adapter exists.
|
||||
Moreover, they are not designed to work where adapters are connected
|
||||
back-to-back.
|
||||
***
|
||||
|
||||
|
||||
5 Large Frame Support
|
||||
======================
|
||||
|
||||
The driver supports large frames (also called jumbo frames). Using large
|
||||
frames can result in an improved throughput if transferring large amounts
|
||||
of data.
|
||||
To enable large frames, set the MTU (maximum transfer unit) of the
|
||||
interface to the desired value (up to 9000), execute the following
|
||||
command:
|
||||
ifconfig eth0 mtu 9000
|
||||
This will only work if you have two adapters connected back-to-back
|
||||
or if you use a switch that supports large frames. When using a switch,
|
||||
it should be configured to allow large frames and auto-negotiation should
|
||||
be set to OFF. The setting must be configured on all adapters that can be
|
||||
reached by the large frames. If one adapter is not set to receive large
|
||||
frames, it will simply drop them.
|
||||
|
||||
You can switch back to the standard ethernet frame size by executing the
|
||||
following command:
|
||||
ifconfig eth0 mtu 1500
|
||||
|
||||
To permanently configure this setting, add a script with the 'ifconfig'
|
||||
line to the system startup sequence (named something like "S99sk98lin"
|
||||
in /etc/rc.d/rc2.d).
|
||||
***
|
||||
|
||||
|
||||
6 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad)
|
||||
==================================================================
|
||||
|
||||
The Marvell Yukon/SysKonnect Linux drivers are able to support VLAN and
|
||||
Link Aggregation according to IEEE standards 802.1, 802.1q, and 802.3ad.
|
||||
These features are only available after installation of open source
|
||||
modules available on the Internet:
|
||||
For VLAN go to: http://www.candelatech.com/~greear/vlan.html
|
||||
For Link Aggregation go to: http://www.st.rim.or.jp/~yumo
|
||||
|
||||
NOTE: SysKonnect GmbH does not offer any support for these open source
|
||||
modules and does not take the responsibility for any kind of
|
||||
failures or problems arising in connection with these modules.
|
||||
|
||||
NOTE: Configuring Link Aggregation on a SysKonnect dual link adapter may
|
||||
cause problems when unloading the driver.
|
||||
|
||||
|
||||
7 Troubleshooting
|
||||
==================
|
||||
|
||||
If any problems occur during the installation process, check the
|
||||
following list:
|
||||
|
||||
|
||||
Problem: The SK-98xx adapter cannot be found by the driver.
|
||||
Solution: In /proc/pci search for the following entry:
|
||||
'Ethernet controller: SysKonnect SK-98xx ...'
|
||||
If this entry exists, the SK-98xx or SK-98xx V2.0 adapter has
|
||||
been found by the system and should be operational.
|
||||
If this entry does not exist or if the file '/proc/pci' is not
|
||||
found, there may be a hardware problem or the PCI support may
|
||||
not be enabled in your kernel.
|
||||
The adapter can be checked using the diagnostics program which
|
||||
is available on the SysKonnect web site:
|
||||
www.syskonnect.com
|
||||
|
||||
Some COMPAQ machines have problems dealing with PCI under Linux.
|
||||
This problem is described in the 'PCI howto' document
|
||||
(included in some distributions or available from the
|
||||
web, e.g. at 'www.linux.org').
|
||||
|
||||
|
||||
Problem: Programs such as 'ifconfig' or 'route' cannot be found or the
|
||||
error message 'Operation not permitted' is displayed.
|
||||
Reason: You are not logged in as user 'root'.
|
||||
Solution: Logout and login as 'root' or change to 'root' via 'su'.
|
||||
|
||||
|
||||
Problem: Upon use of the command 'ping <address>' the message
|
||||
"ping: sendto: Network is unreachable" is displayed.
|
||||
Reason: Your route is not set correctly.
|
||||
Solution: If you are using RedHat, you probably forgot to set up the
|
||||
route in the 'network configuration'.
|
||||
Check the existing routes with the 'route' command and check
|
||||
if an entry for 'eth0' exists, and if so, if it is set correctly.
|
||||
|
||||
|
||||
Problem: The driver can be started, the adapter is connected to the
|
||||
network, but you cannot receive or transmit any packets;
|
||||
e.g. 'ping' does not work.
|
||||
Reason: There is an incorrect route in your routing table.
|
||||
Solution: Check the routing table with the command 'route' and read the
|
||||
manual help pages dealing with routes (enter 'man route').
|
||||
|
||||
NOTE: Although the 2.2.x kernel versions generate the routing entry
|
||||
automatically, problems of this kind may occur here as well. We've
|
||||
come across a situation in which the driver started correctly at
|
||||
system start, but after the driver has been removed and reloaded,
|
||||
the route of the adapter's network pointed to the 'dummy0'device
|
||||
and had to be corrected manually.
|
||||
|
||||
|
||||
Problem: Your computer should act as a router between multiple
|
||||
IP subnetworks (using multiple adapters), but computers in
|
||||
other subnetworks cannot be reached.
|
||||
Reason: Either the router's kernel is not configured for IP forwarding
|
||||
or the routing table and gateway configuration of at least one
|
||||
computer is not working.
|
||||
|
||||
Problem: Upon driver start, the following error message is displayed:
|
||||
"eth0: -- ERROR --
|
||||
Class: internal Software error
|
||||
Nr: 0xcc
|
||||
Msg: SkGeInitPort() cannot init running ports"
|
||||
Reason: You are using a driver compiled for single processor machines
|
||||
on a multiprocessor machine with SMP (Symmetric MultiProcessor)
|
||||
kernel.
|
||||
Solution: Configure your kernel appropriately and recompile the kernel or
|
||||
the modules.
|
||||
|
||||
|
||||
|
||||
If your problem is not listed here, please contact SysKonnect's technical
|
||||
support for help (linux@syskonnect.de).
|
||||
When contacting our technical support, please ensure that the following
|
||||
information is available:
|
||||
- System Manufacturer and HW Informations (CPU, Memory... )
|
||||
- PCI-Boards in your system
|
||||
- Distribution
|
||||
- Kernel version
|
||||
- Driver version
|
||||
***
|
||||
|
||||
|
||||
|
||||
***End of Readme File***
|
@ -155,6 +155,8 @@ Suppose, however, that the firmware file is located on a filesystem accessible
|
||||
only through another device that hasn't been resumed yet. In that case,
|
||||
request_firmware() will fail regardless of whether or not the freezing of tasks
|
||||
is used. Consequently, the problem is not really related to the freezing of
|
||||
tasks, since it generally exists anyway. [The solution to this particular
|
||||
problem is to keep the firmware in memory after it's loaded for the first time
|
||||
and upload if from memory to the device whenever necessary.]
|
||||
tasks, since it generally exists anyway.
|
||||
|
||||
A driver must have all firmwares it may need in RAM before suspend() is called.
|
||||
If keeping them is not practical, for example due to their size, they must be
|
||||
requested early enough using the suspend notifier API described in notifiers.txt.
|
||||
|
50
Documentation/power/notifiers.txt
Normal file
50
Documentation/power/notifiers.txt
Normal file
@ -0,0 +1,50 @@
|
||||
Suspend notifiers
|
||||
(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
|
||||
|
||||
There are some operations that device drivers may want to carry out in their
|
||||
.suspend() routines, but shouldn't, because they can cause the hibernation or
|
||||
suspend to fail. For example, a driver may want to allocate a substantial amount
|
||||
of memory (like 50 MB) in .suspend(), but that shouldn't be done after the
|
||||
swsusp's memory shrinker has run.
|
||||
|
||||
Also, there may be some operations, that subsystems want to carry out before a
|
||||
hibernation/suspend or after a restore/resume, requiring the system to be fully
|
||||
functional, so the drivers' .suspend() and .resume() routines are not suitable
|
||||
for this purpose. For example, device drivers may want to upload firmware to
|
||||
their devices after a restore from a hibernation image, but they cannot do it by
|
||||
calling request_firmware() from their .resume() routines (user land processes
|
||||
are frozen at this point). The solution may be to load the firmware into
|
||||
memory before processes are frozen and upload it from there in the .resume()
|
||||
routine. Of course, a hibernation notifier may be used for this purpose.
|
||||
|
||||
The subsystems that have such needs can register suspend notifiers that will be
|
||||
called upon the following events by the suspend core:
|
||||
|
||||
PM_HIBERNATION_PREPARE The system is going to hibernate or suspend, tasks will
|
||||
be frozen immediately.
|
||||
|
||||
PM_POST_HIBERNATION The system memory state has been restored from a
|
||||
hibernation image or an error occured during the
|
||||
hibernation. Device drivers' .resume() callbacks have
|
||||
been executed and tasks have been thawed.
|
||||
|
||||
PM_SUSPEND_PREPARE The system is preparing for a suspend.
|
||||
|
||||
PM_POST_SUSPEND The system has just resumed or an error occured during
|
||||
the suspend. Device drivers' .resume() callbacks have
|
||||
been executed and tasks have been thawed.
|
||||
|
||||
It is generally assumed that whatever the notifiers do for
|
||||
PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION. Analogously,
|
||||
operations performed for PM_SUSPEND_PREPARE should be reversed for
|
||||
PM_POST_SUSPEND. Additionally, all of the notifiers are called for
|
||||
PM_POST_HIBERNATION if one of them fails for PM_HIBERNATION_PREPARE, and
|
||||
all of the notifiers are called for PM_POST_SUSPEND if one of them fails for
|
||||
PM_SUSPEND_PREPARE.
|
||||
|
||||
The hibernation and suspend notifiers are called with pm_mutex held. They are
|
||||
defined in the usual way, but their last argument is meaningless (it is always
|
||||
NULL). To register and/or unregister a suspend notifier use the functions
|
||||
register_pm_notifier() and unregister_pm_notifier(), respectively, defined in
|
||||
include/linux/suspend.h . If you don't need to unregister the notifier, you can
|
||||
also use the pm_notifier() macro defined in include/linux/suspend.h .
|
@ -50,7 +50,7 @@ Table of Contents
|
||||
g) Freescale SOC SEC Security Engines
|
||||
h) Board Control and Status (BCSR)
|
||||
i) Freescale QUICC Engine module (QE)
|
||||
j) Flash chip nodes
|
||||
j) CFI or JEDEC memory-mapped NOR flash
|
||||
k) Global Utilities Block
|
||||
|
||||
VII - Specifying interrupt information for devices
|
||||
@ -1250,6 +1250,12 @@ platforms are moved over to use the flattened-device-tree model.
|
||||
network device. This is used by the bootwrapper to interpret
|
||||
MAC addresses passed by the firmware when no information other
|
||||
than indices is available to associate an address with a device.
|
||||
- phy-connection-type : a string naming the controller/PHY interface type,
|
||||
i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id", "sgmii",
|
||||
"tbi", or "rtbi". This property is only really needed if the connection
|
||||
is of type "rgmii-id", as all other connection types are detected by
|
||||
hardware.
|
||||
|
||||
|
||||
Example:
|
||||
|
||||
@ -1504,7 +1510,10 @@ platforms are moved over to use the flattened-device-tree model.
|
||||
|
||||
i) Freescale QUICC Engine module (QE)
|
||||
This represents qe module that is installed on PowerQUICC II Pro.
|
||||
Hopefully it will merge backward compatibility with CPM/CPM2.
|
||||
|
||||
NOTE: This is an interim binding; it should be updated to fit
|
||||
in with the CPM binding later in this document.
|
||||
|
||||
Basically, it is a bus of devices, that could act more or less
|
||||
as a complete entity (UCC, USB etc ). All of them should be siblings on
|
||||
the "root" qe node, using the common properties from there.
|
||||
@ -1542,7 +1551,7 @@ platforms are moved over to use the flattened-device-tree model.
|
||||
Required properties:
|
||||
- device_type : should be "spi".
|
||||
- compatible : should be "fsl_spi".
|
||||
- mode : the SPI operation mode, it can be "cpu" or "qe".
|
||||
- mode : the SPI operation mode, it can be "cpu" or "cpu-qe".
|
||||
- reg : Offset and length of the register set for the device
|
||||
- interrupts : <a b> where a is the interrupt number and b is a
|
||||
field that represents an encoding of the sense and level
|
||||
@ -1751,45 +1760,69 @@ platforms are moved over to use the flattened-device-tree model.
|
||||
};
|
||||
};
|
||||
|
||||
j) Flash chip nodes
|
||||
j) CFI or JEDEC memory-mapped NOR flash
|
||||
|
||||
Flash chips (Memory Technology Devices) are often used for solid state
|
||||
file systems on embedded devices.
|
||||
|
||||
Required properties:
|
||||
- compatible : should contain the specific model of flash chip(s)
|
||||
used, if known, followed by either "cfi-flash" or "jedec-flash"
|
||||
- reg : Address range of the flash chip
|
||||
- bank-width : Width (in bytes) of the flash bank. Equal to the
|
||||
device width times the number of interleaved chips.
|
||||
- device-width : (optional) Width of a single flash chip. If
|
||||
omitted, assumed to be equal to 'bank-width'.
|
||||
- #address-cells, #size-cells : Must be present if the flash has
|
||||
sub-nodes representing partitions (see below). In this case
|
||||
both #address-cells and #size-cells must be equal to 1.
|
||||
|
||||
- device_type : has to be "rom"
|
||||
- compatible : Should specify what this flash device is compatible with.
|
||||
Currently, this is most likely to be "direct-mapped" (which
|
||||
corresponds to the MTD physmap mapping driver).
|
||||
- reg : Offset and length of the register set (or memory mapping) for
|
||||
the device.
|
||||
- bank-width : Width of the flash data bus in bytes. Required
|
||||
for the NOR flashes (compatible == "direct-mapped" and others) ONLY.
|
||||
For JEDEC compatible devices, the following additional properties
|
||||
are defined:
|
||||
|
||||
Recommended properties :
|
||||
- vendor-id : Contains the flash chip's vendor id (1 byte).
|
||||
- device-id : Contains the flash chip's device id (1 byte).
|
||||
|
||||
- partitions : Several pairs of 32-bit values where the first value is
|
||||
partition's offset from the start of the device and the second one is
|
||||
partition size in bytes with LSB used to signify a read only
|
||||
partition (so, the partition size should always be an even number).
|
||||
- partition-names : The list of concatenated zero terminated strings
|
||||
representing the partition names.
|
||||
- probe-type : The type of probe which should be done for the chip
|
||||
(JEDEC vs CFI actually). Valid ONLY for NOR flashes.
|
||||
In addition to the information on the flash bank itself, the
|
||||
device tree may optionally contain additional information
|
||||
describing partitions of the flash address space. This can be
|
||||
used on platforms which have strong conventions about which
|
||||
portions of the flash are used for what purposes, but which don't
|
||||
use an on-flash partition table such as RedBoot.
|
||||
|
||||
Example:
|
||||
Each partition is represented as a sub-node of the flash device.
|
||||
Each node's name represents the name of the corresponding
|
||||
partition of the flash device.
|
||||
|
||||
flash@ff000000 {
|
||||
device_type = "rom";
|
||||
compatible = "direct-mapped";
|
||||
probe-type = "CFI";
|
||||
reg = <ff000000 01000000>;
|
||||
bank-width = <4>;
|
||||
partitions = <00000000 00f80000
|
||||
00f80000 00080001>;
|
||||
partition-names = "fs\0firmware";
|
||||
};
|
||||
Flash partitions
|
||||
- reg : The partition's offset and size within the flash bank.
|
||||
- label : (optional) The label / name for this flash partition.
|
||||
If omitted, the label is taken from the node name (excluding
|
||||
the unit address).
|
||||
- read-only : (optional) This parameter, if present, is a hint to
|
||||
Linux that this flash partition should only be mounted
|
||||
read-only. This is usually used for flash partitions
|
||||
containing early-boot firmware images or data which should not
|
||||
be clobbered.
|
||||
|
||||
Example:
|
||||
|
||||
flash@ff000000 {
|
||||
compatible = "amd,am29lv128ml", "cfi-flash";
|
||||
reg = <ff000000 01000000>;
|
||||
bank-width = <4>;
|
||||
device-width = <1>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
fs@0 {
|
||||
label = "fs";
|
||||
reg = <0 f80000>;
|
||||
};
|
||||
firmware@f80000 {
|
||||
label ="firmware";
|
||||
reg = <f80000 80000>;
|
||||
read-only;
|
||||
};
|
||||
};
|
||||
|
||||
k) Global Utilities Block
|
||||
|
||||
@ -1818,6 +1851,397 @@ platforms are moved over to use the flattened-device-tree model.
|
||||
fsl,has-rstcr;
|
||||
};
|
||||
|
||||
l) Freescale Communications Processor Module
|
||||
|
||||
NOTE: This is an interim binding, and will likely change slightly,
|
||||
as more devices are supported. The QE bindings especially are
|
||||
incomplete.
|
||||
|
||||
i) Root CPM node
|
||||
|
||||
Properties:
|
||||
- compatible : "fsl,cpm1", "fsl,cpm2", or "fsl,qe".
|
||||
- reg : A 48-byte region beginning with CPCR.
|
||||
|
||||
Example:
|
||||
cpm@119c0 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
#interrupt-cells = <2>;
|
||||
compatible = "fsl,mpc8272-cpm", "fsl,cpm2";
|
||||
reg = <119c0 30>;
|
||||
}
|
||||
|
||||
ii) Properties common to mulitple CPM/QE devices
|
||||
|
||||
- fsl,cpm-command : This value is ORed with the opcode and command flag
|
||||
to specify the device on which a CPM command operates.
|
||||
|
||||
- fsl,cpm-brg : Indicates which baud rate generator the device
|
||||
is associated with. If absent, an unused BRG
|
||||
should be dynamically allocated. If zero, the
|
||||
device uses an external clock rather than a BRG.
|
||||
|
||||
- reg : Unless otherwise specified, the first resource represents the
|
||||
scc/fcc/ucc registers, and the second represents the device's
|
||||
parameter RAM region (if it has one).
|
||||
|
||||
iii) Serial
|
||||
|
||||
Currently defined compatibles:
|
||||
- fsl,cpm1-smc-uart
|
||||
- fsl,cpm2-smc-uart
|
||||
- fsl,cpm1-scc-uart
|
||||
- fsl,cpm2-scc-uart
|
||||
- fsl,qe-uart
|
||||
|
||||
Example:
|
||||
|
||||
serial@11a00 {
|
||||
device_type = "serial";
|
||||
compatible = "fsl,mpc8272-scc-uart",
|
||||
"fsl,cpm2-scc-uart";
|
||||
reg = <11a00 20 8000 100>;
|
||||
interrupts = <28 8>;
|
||||
interrupt-parent = <&PIC>;
|
||||
fsl,cpm-brg = <1>;
|
||||
fsl,cpm-command = <00800000>;
|
||||
};
|
||||
|
||||
iii) Network
|
||||
|
||||
Currently defined compatibles:
|
||||
- fsl,cpm1-scc-enet
|
||||
- fsl,cpm2-scc-enet
|
||||
- fsl,cpm1-fec-enet
|
||||
- fsl,cpm2-fcc-enet (third resource is GFEMR)
|
||||
- fsl,qe-enet
|
||||
|
||||
Example:
|
||||
|
||||
ethernet@11300 {
|
||||
device_type = "network";
|
||||
compatible = "fsl,mpc8272-fcc-enet",
|
||||
"fsl,cpm2-fcc-enet";
|
||||
reg = <11300 20 8400 100 11390 1>;
|
||||
local-mac-address = [ 00 00 00 00 00 00 ];
|
||||
interrupts = <20 8>;
|
||||
interrupt-parent = <&PIC>;
|
||||
phy-handle = <&PHY0>;
|
||||
linux,network-index = <0>;
|
||||
fsl,cpm-command = <12000300>;
|
||||
};
|
||||
|
||||
iv) MDIO
|
||||
|
||||
Currently defined compatibles:
|
||||
fsl,pq1-fec-mdio (reg is same as first resource of FEC device)
|
||||
fsl,cpm2-mdio-bitbang (reg is port C registers)
|
||||
|
||||
Properties for fsl,cpm2-mdio-bitbang:
|
||||
fsl,mdio-pin : pin of port C controlling mdio data
|
||||
fsl,mdc-pin : pin of port C controlling mdio clock
|
||||
|
||||
Example:
|
||||
|
||||
mdio@10d40 {
|
||||
device_type = "mdio";
|
||||
compatible = "fsl,mpc8272ads-mdio-bitbang",
|
||||
"fsl,mpc8272-mdio-bitbang",
|
||||
"fsl,cpm2-mdio-bitbang";
|
||||
reg = <10d40 14>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
fsl,mdio-pin = <12>;
|
||||
fsl,mdc-pin = <13>;
|
||||
};
|
||||
|
||||
v) Baud Rate Generators
|
||||
|
||||
Currently defined compatibles:
|
||||
fsl,cpm-brg
|
||||
fsl,cpm1-brg
|
||||
fsl,cpm2-brg
|
||||
|
||||
Properties:
|
||||
- reg : There may be an arbitrary number of reg resources; BRG
|
||||
numbers are assigned to these in order.
|
||||
- clock-frequency : Specifies the base frequency driving
|
||||
the BRG.
|
||||
|
||||
Example:
|
||||
|
||||
brg@119f0 {
|
||||
compatible = "fsl,mpc8272-brg",
|
||||
"fsl,cpm2-brg",
|
||||
"fsl,cpm-brg";
|
||||
reg = <119f0 10 115f0 10>;
|
||||
clock-frequency = <d#25000000>;
|
||||
};
|
||||
|
||||
vi) Interrupt Controllers
|
||||
|
||||
Currently defined compatibles:
|
||||
- fsl,cpm1-pic
|
||||
- only one interrupt cell
|
||||
- fsl,pq1-pic
|
||||
- fsl,cpm2-pic
|
||||
- second interrupt cell is level/sense:
|
||||
- 2 is falling edge
|
||||
- 8 is active low
|
||||
|
||||
Example:
|
||||
|
||||
interrupt-controller@10c00 {
|
||||
#interrupt-cells = <2>;
|
||||
interrupt-controller;
|
||||
reg = <10c00 80>;
|
||||
compatible = "mpc8272-pic", "fsl,cpm2-pic";
|
||||
};
|
||||
|
||||
vii) USB (Universal Serial Bus Controller)
|
||||
|
||||
Properties:
|
||||
- compatible : "fsl,cpm1-usb", "fsl,cpm2-usb", "fsl,qe-usb"
|
||||
|
||||
Example:
|
||||
usb@11bc0 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
compatible = "fsl,cpm2-usb";
|
||||
reg = <11b60 18 8b00 100>;
|
||||
interrupts = <b 8>;
|
||||
interrupt-parent = <&PIC>;
|
||||
fsl,cpm-command = <2e600000>;
|
||||
};
|
||||
|
||||
viii) Multi-User RAM (MURAM)
|
||||
|
||||
The multi-user/dual-ported RAM is expressed as a bus under the CPM node.
|
||||
|
||||
Ranges must be set up subject to the following restrictions:
|
||||
|
||||
- Children's reg nodes must be offsets from the start of all muram, even
|
||||
if the user-data area does not begin at zero.
|
||||
- If multiple range entries are used, the difference between the parent
|
||||
address and the child address must be the same in all, so that a single
|
||||
mapping can cover them all while maintaining the ability to determine
|
||||
CPM-side offsets with pointer subtraction. It is recommended that
|
||||
multiple range entries not be used.
|
||||
- A child address of zero must be translatable, even if no reg resources
|
||||
contain it.
|
||||
|
||||
A child "data" node must exist, compatible with "fsl,cpm-muram-data", to
|
||||
indicate the portion of muram that is usable by the OS for arbitrary
|
||||
purposes. The data node may have an arbitrary number of reg resources,
|
||||
all of which contribute to the allocatable muram pool.
|
||||
|
||||
Example, based on mpc8272:
|
||||
|
||||
muram@0 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
ranges = <0 0 10000>;
|
||||
|
||||
data@0 {
|
||||
compatible = "fsl,cpm-muram-data";
|
||||
reg = <0 2000 9800 800>;
|
||||
};
|
||||
};
|
||||
|
||||
m) Chipselect/Local Bus
|
||||
|
||||
Properties:
|
||||
- name : Should be localbus
|
||||
- #address-cells : Should be either two or three. The first cell is the
|
||||
chipselect number, and the remaining cells are the
|
||||
offset into the chipselect.
|
||||
- #size-cells : Either one or two, depending on how large each chipselect
|
||||
can be.
|
||||
- ranges : Each range corresponds to a single chipselect, and cover
|
||||
the entire access window as configured.
|
||||
|
||||
Example:
|
||||
localbus@f0010100 {
|
||||
compatible = "fsl,mpc8272ads-localbus",
|
||||
"fsl,mpc8272-localbus",
|
||||
"fsl,pq2-localbus";
|
||||
#address-cells = <2>;
|
||||
#size-cells = <1>;
|
||||
reg = <f0010100 40>;
|
||||
|
||||
ranges = <0 0 fe000000 02000000
|
||||
1 0 f4500000 00008000>;
|
||||
|
||||
flash@0,0 {
|
||||
compatible = "jedec-flash";
|
||||
reg = <0 0 2000000>;
|
||||
bank-width = <4>;
|
||||
device-width = <1>;
|
||||
};
|
||||
|
||||
board-control@1,0 {
|
||||
reg = <1 0 20>;
|
||||
compatible = "fsl,mpc8272ads-bcsr";
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
n) 4xx/Axon EMAC ethernet nodes
|
||||
|
||||
The EMAC ethernet controller in IBM and AMCC 4xx chips, and also
|
||||
the Axon bridge. To operate this needs to interact with a ths
|
||||
special McMAL DMA controller, and sometimes an RGMII or ZMII
|
||||
interface. In addition to the nodes and properties described
|
||||
below, the node for the OPB bus on which the EMAC sits must have a
|
||||
correct clock-frequency property.
|
||||
|
||||
i) The EMAC node itself
|
||||
|
||||
Required properties:
|
||||
- device_type : "network"
|
||||
|
||||
- compatible : compatible list, contains 2 entries, first is
|
||||
"ibm,emac-CHIP" where CHIP is the host ASIC (440gx,
|
||||
405gp, Axon) and second is either "ibm,emac" or
|
||||
"ibm,emac4". For Axon, thus, we have: "ibm,emac-axon",
|
||||
"ibm,emac4"
|
||||
- interrupts : <interrupt mapping for EMAC IRQ and WOL IRQ>
|
||||
- interrupt-parent : optional, if needed for interrupt mapping
|
||||
- reg : <registers mapping>
|
||||
- local-mac-address : 6 bytes, MAC address
|
||||
- mal-device : phandle of the associated McMAL node
|
||||
- mal-tx-channel : 1 cell, index of the tx channel on McMAL associated
|
||||
with this EMAC
|
||||
- mal-rx-channel : 1 cell, index of the rx channel on McMAL associated
|
||||
with this EMAC
|
||||
- cell-index : 1 cell, hardware index of the EMAC cell on a given
|
||||
ASIC (typically 0x0 and 0x1 for EMAC0 and EMAC1 on
|
||||
each Axon chip)
|
||||
- max-frame-size : 1 cell, maximum frame size supported in bytes
|
||||
- rx-fifo-size : 1 cell, Rx fifo size in bytes for 10 and 100 Mb/sec
|
||||
operations.
|
||||
For Axon, 2048
|
||||
- tx-fifo-size : 1 cell, Tx fifo size in bytes for 10 and 100 Mb/sec
|
||||
operations.
|
||||
For Axon, 2048.
|
||||
- fifo-entry-size : 1 cell, size of a fifo entry (used to calculate
|
||||
thresholds).
|
||||
For Axon, 0x00000010
|
||||
- mal-burst-size : 1 cell, MAL burst size (used to calculate thresholds)
|
||||
in bytes.
|
||||
For Axon, 0x00000100 (I think ...)
|
||||
- phy-mode : string, mode of operations of the PHY interface.
|
||||
Supported values are: "mii", "rmii", "smii", "rgmii",
|
||||
"tbi", "gmii", rtbi", "sgmii".
|
||||
For Axon on CAB, it is "rgmii"
|
||||
- mdio-device : 1 cell, required iff using shared MDIO registers
|
||||
(440EP). phandle of the EMAC to use to drive the
|
||||
MDIO lines for the PHY used by this EMAC.
|
||||
- zmii-device : 1 cell, required iff connected to a ZMII. phandle of
|
||||
the ZMII device node
|
||||
- zmii-channel : 1 cell, required iff connected to a ZMII. Which ZMII
|
||||
channel or 0xffffffff if ZMII is only used for MDIO.
|
||||
- rgmii-device : 1 cell, required iff connected to an RGMII. phandle
|
||||
of the RGMII device node.
|
||||
For Axon: phandle of plb5/plb4/opb/rgmii
|
||||
- rgmii-channel : 1 cell, required iff connected to an RGMII. Which
|
||||
RGMII channel is used by this EMAC.
|
||||
Fox Axon: present, whatever value is appropriate for each
|
||||
EMAC, that is the content of the current (bogus) "phy-port"
|
||||
property.
|
||||
|
||||
Recommended properties:
|
||||
- linux,network-index : This is the intended "index" of this
|
||||
network device. This is used by the bootwrapper to interpret
|
||||
MAC addresses passed by the firmware when no information other
|
||||
than indices is available to associate an address with a device.
|
||||
|
||||
Optional properties:
|
||||
- phy-address : 1 cell, optional, MDIO address of the PHY. If absent,
|
||||
a search is performed.
|
||||
- phy-map : 1 cell, optional, bitmap of addresses to probe the PHY
|
||||
for, used if phy-address is absent. bit 0x00000001 is
|
||||
MDIO address 0.
|
||||
For Axon it can be absent, thouugh my current driver
|
||||
doesn't handle phy-address yet so for now, keep
|
||||
0x00ffffff in it.
|
||||
- rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
|
||||
operations (if absent the value is the same as
|
||||
rx-fifo-size). For Axon, either absent or 2048.
|
||||
- tx-fifo-size-gige : 1 cell, Tx fifo size in bytes for 1000 Mb/sec
|
||||
operations (if absent the value is the same as
|
||||
tx-fifo-size). For Axon, either absent or 2048.
|
||||
- tah-device : 1 cell, optional. If connected to a TAH engine for
|
||||
offload, phandle of the TAH device node.
|
||||
- tah-channel : 1 cell, optional. If appropriate, channel used on the
|
||||
TAH engine.
|
||||
|
||||
Example:
|
||||
|
||||
EMAC0: ethernet@40000800 {
|
||||
linux,network-index = <0>;
|
||||
device_type = "network";
|
||||
compatible = "ibm,emac-440gp", "ibm,emac";
|
||||
interrupt-parent = <&UIC1>;
|
||||
interrupts = <1c 4 1d 4>;
|
||||
reg = <40000800 70>;
|
||||
local-mac-address = [00 04 AC E3 1B 1E];
|
||||
mal-device = <&MAL0>;
|
||||
mal-tx-channel = <0 1>;
|
||||
mal-rx-channel = <0>;
|
||||
cell-index = <0>;
|
||||
max-frame-size = <5dc>;
|
||||
rx-fifo-size = <1000>;
|
||||
tx-fifo-size = <800>;
|
||||
phy-mode = "rmii";
|
||||
phy-map = <00000001>;
|
||||
zmii-device = <&ZMII0>;
|
||||
zmii-channel = <0>;
|
||||
};
|
||||
|
||||
ii) McMAL node
|
||||
|
||||
Required properties:
|
||||
- device_type : "dma-controller"
|
||||
- compatible : compatible list, containing 2 entries, first is
|
||||
"ibm,mcmal-CHIP" where CHIP is the host ASIC (like
|
||||
emac) and the second is either "ibm,mcmal" or
|
||||
"ibm,mcmal2".
|
||||
For Axon, "ibm,mcmal-axon","ibm,mcmal2"
|
||||
- interrupts : <interrupt mapping for the MAL interrupts sources:
|
||||
5 sources: tx_eob, rx_eob, serr, txde, rxde>.
|
||||
For Axon: This is _different_ from the current
|
||||
firmware. We use the "delayed" interrupts for txeob
|
||||
and rxeob. Thus we end up with mapping those 5 MPIC
|
||||
interrupts, all level positive sensitive: 10, 11, 32,
|
||||
33, 34 (in decimal)
|
||||
- dcr-reg : < DCR registers range >
|
||||
- dcr-parent : if needed for dcr-reg
|
||||
- num-tx-chans : 1 cell, number of Tx channels
|
||||
- num-rx-chans : 1 cell, number of Rx channels
|
||||
|
||||
iii) ZMII node
|
||||
|
||||
Required properties:
|
||||
- compatible : compatible list, containing 2 entries, first is
|
||||
"ibm,zmii-CHIP" where CHIP is the host ASIC (like
|
||||
EMAC) and the second is "ibm,zmii".
|
||||
For Axon, there is no ZMII node.
|
||||
- reg : <registers mapping>
|
||||
|
||||
iv) RGMII node
|
||||
|
||||
Required properties:
|
||||
- compatible : compatible list, containing 2 entries, first is
|
||||
"ibm,rgmii-CHIP" where CHIP is the host ASIC (like
|
||||
EMAC) and the second is "ibm,rgmii".
|
||||
For Axon, "ibm,rgmii-axon","ibm,rgmii"
|
||||
- reg : <registers mapping>
|
||||
- revision : as provided by the RGMII new version register if
|
||||
available.
|
||||
For Axon: 0x0000012a
|
||||
|
||||
More devices will be defined as this spec matures.
|
||||
|
||||
VII - Specifying interrupt information for devices
|
||||
|
89
Documentation/rfkill.txt
Normal file
89
Documentation/rfkill.txt
Normal file
@ -0,0 +1,89 @@
|
||||
rfkill - RF switch subsystem support
|
||||
====================================
|
||||
|
||||
1 Implementation details
|
||||
2 Driver support
|
||||
3 Userspace support
|
||||
|
||||
===============================================================================
|
||||
1: Implementation details
|
||||
|
||||
The rfkill switch subsystem offers support for keys often found on laptops
|
||||
to enable wireless devices like WiFi and Bluetooth.
|
||||
|
||||
This is done by providing the user 3 possibilities:
|
||||
1 - The rfkill system handles all events; userspace is not aware of events.
|
||||
2 - The rfkill system handles all events; userspace is informed about the events.
|
||||
3 - The rfkill system does not handle events; userspace handles all events.
|
||||
|
||||
The buttons to enable and disable the wireless radios are important in
|
||||
situations where the user is for example using his laptop on a location where
|
||||
wireless radios _must_ be disabled (e.g. airplanes).
|
||||
Because of this requirement, userspace support for the keys should not be
|
||||
made mandatory. Because userspace might want to perform some additional smarter
|
||||
tasks when the key is pressed, rfkill still provides userspace the possibility
|
||||
to take over the task to handle the key events.
|
||||
|
||||
The system inside the kernel has been split into 2 separate sections:
|
||||
1 - RFKILL
|
||||
2 - RFKILL_INPUT
|
||||
|
||||
The first option enables rfkill support and will make sure userspace will
|
||||
be notified of any events through the input device. It also creates several
|
||||
sysfs entries which can be used by userspace. See section "Userspace support".
|
||||
|
||||
The second option provides an rfkill input handler. This handler will
|
||||
listen to all rfkill key events and will toggle the radio accordingly.
|
||||
With this option enabled userspace could either do nothing or simply
|
||||
perform monitoring tasks.
|
||||
|
||||
====================================
|
||||
2: Driver support
|
||||
|
||||
To build a driver with rfkill subsystem support, the driver should
|
||||
depend on the Kconfig symbol RFKILL; it should _not_ depend on
|
||||
RKFILL_INPUT.
|
||||
|
||||
Unless key events trigger an interrupt to which the driver listens, polling
|
||||
will be required to determine the key state changes. For this the input
|
||||
layer providers the input-polldev handler.
|
||||
|
||||
A driver should implement a few steps to correctly make use of the
|
||||
rfkill subsystem. First for non-polling drivers:
|
||||
|
||||
- rfkill_allocate()
|
||||
- input_allocate_device()
|
||||
- rfkill_register()
|
||||
- input_register_device()
|
||||
|
||||
For polling drivers:
|
||||
|
||||
- rfkill_allocate()
|
||||
- input_allocate_polled_device()
|
||||
- rfkill_register()
|
||||
- input_register_polled_device()
|
||||
|
||||
When a key event has been detected, the correct event should be
|
||||
sent over the input device which has been registered by the driver.
|
||||
|
||||
====================================
|
||||
3: Userspace support
|
||||
|
||||
For each key an input device will be created which will send out the correct
|
||||
key event when the rfkill key has been pressed.
|
||||
|
||||
The following sysfs entries will be created:
|
||||
|
||||
name: Name assigned by driver to this key (interface or driver name).
|
||||
type: Name of the key type ("wlan", "bluetooth", etc).
|
||||
state: Current state of the key. 1: On, 0: Off.
|
||||
claim: 1: Userspace handles events, 0: Kernel handles events
|
||||
|
||||
Both the "state" and "claim" entries are also writable. For the "state" entry
|
||||
this means that when 1 or 0 is written all radios, not yet in the requested
|
||||
state, will be will be toggled accordingly.
|
||||
For the "claim" entry writing 1 to it means that the kernel no longer handles
|
||||
key events even though RFKILL_INPUT input was enabled. When "claim" has been
|
||||
set to 0, userspace should make sure that it listens for the input events or
|
||||
check the sysfs "state" entry regularly to correctly perform the required
|
||||
tasks when the rkfill key is pressed.
|
26
Documentation/s390/00-INDEX
Normal file
26
Documentation/s390/00-INDEX
Normal file
@ -0,0 +1,26 @@
|
||||
00-INDEX
|
||||
- this file.
|
||||
3270.ChangeLog
|
||||
- ChangeLog for the UTS Global 3270-support patch (outdated).
|
||||
3270.txt
|
||||
- how to use the IBM 3270 display system support.
|
||||
cds.txt
|
||||
- s390 common device support (common I/O layer).
|
||||
CommonIO
|
||||
- common I/O layer command line parameters, procfs and debugfs entries
|
||||
config3270.sh
|
||||
- example configuration for 3270 devices.
|
||||
DASD
|
||||
- information on the DASD disk device driver.
|
||||
Debugging390.txt
|
||||
- hints for debugging on s390 systems.
|
||||
driver-model.txt
|
||||
- information on s390 devices and the driver model.
|
||||
monreader.txt
|
||||
- information on accessing the z/VM monitor stream from Linux.
|
||||
s390dbf.txt
|
||||
- information on using the s390 debug feature.
|
||||
TAPE
|
||||
- information on the driver for channel-attached tapes.
|
||||
zfcpdump
|
||||
- information on the s390 SCSI dump tool.
|
@ -1,5 +1,5 @@
|
||||
S/390 common I/O-Layer - command line parameters and /proc entries
|
||||
==================================================================
|
||||
S/390 common I/O-Layer - command line parameters, procfs and debugfs entries
|
||||
============================================================================
|
||||
|
||||
Command line parameters
|
||||
-----------------------
|
||||
@ -7,9 +7,9 @@ Command line parameters
|
||||
* cio_msg = yes | no
|
||||
|
||||
Determines whether information on found devices and sensed device
|
||||
characteristics should be shown during startup, i. e. messages of the types
|
||||
"Detected device 0.0.4711 on subchannel 0.0.0042" and "SenseID: Device
|
||||
0.0.4711 reports: ...".
|
||||
characteristics should be shown during startup or when new devices are
|
||||
found, i. e. messages of the types "Detected device 0.0.4711 on subchannel
|
||||
0.0.0042" and "SenseID: Device 0.0.4711 reports: ...".
|
||||
|
||||
Default is off.
|
||||
|
||||
@ -26,8 +26,10 @@ Command line parameters
|
||||
An ignored device can be un-ignored later; see the "/proc entries"-section for
|
||||
details.
|
||||
|
||||
The devices must be given either as bus ids (0.0.abcd) or as hexadecimal
|
||||
device numbers (0xabcd or abcd, for 2.4 backward compatibility).
|
||||
The devices must be given either as bus ids (0.x.abcd) or as hexadecimal
|
||||
device numbers (0xabcd or abcd, for 2.4 backward compatibility). If you
|
||||
give a device number 0xabcd, it will be interpreted as 0.0.abcd.
|
||||
|
||||
You can use the 'all' keyword to ignore all devices.
|
||||
The '!' operator will cause the I/O-layer to _not_ ignore a device.
|
||||
The command line is parsed from left to right.
|
||||
@ -81,31 +83,36 @@ Command line parameters
|
||||
will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored
|
||||
devices.
|
||||
|
||||
The devices can be specified either by bus id (0.0.abcd) or, for 2.4 backward
|
||||
compatibility, by the device number in hexadecimal (0xabcd or abcd).
|
||||
The devices can be specified either by bus id (0.x.abcd) or, for 2.4 backward
|
||||
compatibility, by the device number in hexadecimal (0xabcd or abcd). Device
|
||||
numbers given as 0xabcd will be interpreted as 0.0.abcd.
|
||||
|
||||
* For some of the information present in the /proc filesystem in 2.4 (namely,
|
||||
/proc/subchannels and /proc/chpids), see driver-model.txt.
|
||||
Information formerly in /proc/irq_count is now in /proc/interrupts.
|
||||
|
||||
|
||||
* /proc/s390dbf/cio_*/ (S/390 debug feature)
|
||||
debugfs entries
|
||||
---------------
|
||||
|
||||
* /sys/kernel/debug/s390dbf/cio_*/ (S/390 debug feature)
|
||||
|
||||
Some views generated by the debug feature to hold various debug outputs.
|
||||
|
||||
- /proc/s390dbf/cio_crw/sprintf
|
||||
- /sys/kernel/debug/s390dbf/cio_crw/sprintf
|
||||
Messages from the processing of pending channel report words (machine check
|
||||
handling), which will also show when CONFIG_DEBUG_CRW is defined.
|
||||
handling).
|
||||
|
||||
- /proc/s390dbf/cio_msg/sprintf
|
||||
Various debug messages from the common I/O-layer; generally, messages which
|
||||
will also show when CONFIG_DEBUG_IO is defined.
|
||||
- /sys/kernel/debug/s390dbf/cio_msg/sprintf
|
||||
Various debug messages from the common I/O-layer, including messages
|
||||
printed when cio_msg=yes.
|
||||
|
||||
- /proc/s390dbf/cio_trace/hex_ascii
|
||||
- /sys/kernel/debug/s390dbf/cio_trace/hex_ascii
|
||||
Logs the calling of functions in the common I/O-layer and, if applicable,
|
||||
which subchannel they were called for, as well as dumps of some data
|
||||
structures (like irb in an error case).
|
||||
|
||||
The level of logging can be changed to be more or less verbose by piping to
|
||||
/proc/s390dbf/cio_*/level a number between 0 and 6; see the documentation on
|
||||
the S/390 debug feature (Documentation/s390/s390dbf.txt) for details.
|
||||
|
||||
* For some of the information present in the /proc filesystem in 2.4 (namely,
|
||||
/proc/subchannels and /proc/chpids), see driver-model.txt.
|
||||
Information formerly in /proc/irq_count is now in /proc/interrupts.
|
||||
/sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
|
||||
documentation on the S/390 debug feature (Documentation/s390/s390dbf.txt)
|
||||
for details.
|
||||
|
@ -286,10 +286,10 @@ first:
|
||||
timeout value
|
||||
-EIO: the common I/O layer terminated the request due to an error state
|
||||
|
||||
If the concurrent sense flag in the extended status word in the irb is set, the
|
||||
field irb->scsw.count describes the number of device specific sense bytes
|
||||
available in the extended control word irb->scsw.ecw[0]. No device sensing by
|
||||
the device driver itself is required.
|
||||
If the concurrent sense flag in the extended status word (esw) in the irb is
|
||||
set, the field erw.scnt in the esw describes the number of device specific
|
||||
sense bytes available in the extended control word irb->scsw.ecw[]. No device
|
||||
sensing by the device driver itself is required.
|
||||
|
||||
The device interrupt handler can use the following definitions to investigate
|
||||
the primary unit check source coded in sense byte 0 :
|
||||
|
@ -83,7 +83,7 @@ Some implementation details:
|
||||
CFS uses nanosecond granularity accounting and does not rely on any
|
||||
jiffies or other HZ detail. Thus the CFS scheduler has no notion of
|
||||
'timeslices' and has no heuristics whatsoever. There is only one
|
||||
central tunable:
|
||||
central tunable (you have to switch on CONFIG_SCHED_DEBUG):
|
||||
|
||||
/proc/sys/kernel/sched_granularity_ns
|
||||
|
||||
|
108
Documentation/sched-nice-design.txt
Normal file
108
Documentation/sched-nice-design.txt
Normal file
@ -0,0 +1,108 @@
|
||||
This document explains the thinking about the revamped and streamlined
|
||||
nice-levels implementation in the new Linux scheduler.
|
||||
|
||||
Nice levels were always pretty weak under Linux and people continuously
|
||||
pestered us to make nice +19 tasks use up much less CPU time.
|
||||
|
||||
Unfortunately that was not that easy to implement under the old
|
||||
scheduler, (otherwise we'd have done it long ago) because nice level
|
||||
support was historically coupled to timeslice length, and timeslice
|
||||
units were driven by the HZ tick, so the smallest timeslice was 1/HZ.
|
||||
|
||||
In the O(1) scheduler (in 2003) we changed negative nice levels to be
|
||||
much stronger than they were before in 2.4 (and people were happy about
|
||||
that change), and we also intentionally calibrated the linear timeslice
|
||||
rule so that nice +19 level would be _exactly_ 1 jiffy. To better
|
||||
understand it, the timeslice graph went like this (cheesy ASCII art
|
||||
alert!):
|
||||
|
||||
|
||||
A
|
||||
\ | [timeslice length]
|
||||
\ |
|
||||
\ |
|
||||
\ |
|
||||
\ |
|
||||
\|___100msecs
|
||||
|^ . _
|
||||
| ^ . _
|
||||
| ^ . _
|
||||
-*----------------------------------*-----> [nice level]
|
||||
-20 | +19
|
||||
|
|
||||
|
|
||||
|
||||
So that if someone wanted to really renice tasks, +19 would give a much
|
||||
bigger hit than the normal linear rule would do. (The solution of
|
||||
changing the ABI to extend priorities was discarded early on.)
|
||||
|
||||
This approach worked to some degree for some time, but later on with
|
||||
HZ=1000 it caused 1 jiffy to be 1 msec, which meant 0.1% CPU usage which
|
||||
we felt to be a bit excessive. Excessive _not_ because it's too small of
|
||||
a CPU utilization, but because it causes too frequent (once per
|
||||
millisec) rescheduling. (and would thus trash the cache, etc. Remember,
|
||||
this was long ago when hardware was weaker and caches were smaller, and
|
||||
people were running number crunching apps at nice +19.)
|
||||
|
||||
So for HZ=1000 we changed nice +19 to 5msecs, because that felt like the
|
||||
right minimal granularity - and this translates to 5% CPU utilization.
|
||||
But the fundamental HZ-sensitive property for nice+19 still remained,
|
||||
and we never got a single complaint about nice +19 being too _weak_ in
|
||||
terms of CPU utilization, we only got complaints about it (still) being
|
||||
too _strong_ :-)
|
||||
|
||||
To sum it up: we always wanted to make nice levels more consistent, but
|
||||
within the constraints of HZ and jiffies and their nasty design level
|
||||
coupling to timeslices and granularity it was not really viable.
|
||||
|
||||
The second (less frequent but still periodically occuring) complaint
|
||||
about Linux's nice level support was its assymetry around the origo
|
||||
(which you can see demonstrated in the picture above), or more
|
||||
accurately: the fact that nice level behavior depended on the _absolute_
|
||||
nice level as well, while the nice API itself is fundamentally
|
||||
"relative":
|
||||
|
||||
int nice(int inc);
|
||||
|
||||
asmlinkage long sys_nice(int increment)
|
||||
|
||||
(the first one is the glibc API, the second one is the syscall API.)
|
||||
Note that the 'inc' is relative to the current nice level. Tools like
|
||||
bash's "nice" command mirror this relative API.
|
||||
|
||||
With the old scheduler, if you for example started a niced task with +1
|
||||
and another task with +2, the CPU split between the two tasks would
|
||||
depend on the nice level of the parent shell - if it was at nice -10 the
|
||||
CPU split was different than if it was at +5 or +10.
|
||||
|
||||
A third complaint against Linux's nice level support was that negative
|
||||
nice levels were not 'punchy enough', so lots of people had to resort to
|
||||
run audio (and other multimedia) apps under RT priorities such as
|
||||
SCHED_FIFO. But this caused other problems: SCHED_FIFO is not starvation
|
||||
proof, and a buggy SCHED_FIFO app can also lock up the system for good.
|
||||
|
||||
The new scheduler in v2.6.23 addresses all three types of complaints:
|
||||
|
||||
To address the first complaint (of nice levels being not "punchy"
|
||||
enough), the scheduler was decoupled from 'time slice' and HZ concepts
|
||||
(and granularity was made a separate concept from nice levels) and thus
|
||||
it was possible to implement better and more consistent nice +19
|
||||
support: with the new scheduler nice +19 tasks get a HZ-independent
|
||||
1.5%, instead of the variable 3%-5%-9% range they got in the old
|
||||
scheduler.
|
||||
|
||||
To address the second complaint (of nice levels not being consistent),
|
||||
the new scheduler makes nice(1) have the same CPU utilization effect on
|
||||
tasks, regardless of their absolute nice levels. So on the new
|
||||
scheduler, running a nice +10 and a nice 11 task has the same CPU
|
||||
utilization "split" between them as running a nice -5 and a nice -4
|
||||
task. (one will get 55% of the CPU, the other 45%.) That is why nice
|
||||
levels were changed to be "multiplicative" (or exponential) - that way
|
||||
it does not matter which nice level you start out from, the 'relative
|
||||
result' will always be the same.
|
||||
|
||||
The third complaint (of negative nice levels not being "punchy" enough
|
||||
and forcing audio apps to run under the more dangerous SCHED_FIFO
|
||||
scheduling policy) is addressed by the new scheduler almost
|
||||
automatically: stronger negative nice levels are an automatic
|
||||
side-effect of the recalibrated dynamic range of nice levels.
|
@ -1,10 +1,11 @@
|
||||
Version 10 of schedstats includes support for sched_domains, which
|
||||
hit the mainline kernel in 2.6.7. Some counters make more sense to be
|
||||
per-runqueue; other to be per-domain. Note that domains (and their associated
|
||||
information) will only be pertinent and available on machines utilizing
|
||||
CONFIG_SMP.
|
||||
Version 14 of schedstats includes support for sched_domains, which hit the
|
||||
mainline kernel in 2.6.20 although it is identical to the stats from version
|
||||
12 which was in the kernel from 2.6.13-2.6.19 (version 13 never saw a kernel
|
||||
release). Some counters make more sense to be per-runqueue; other to be
|
||||
per-domain. Note that domains (and their associated information) will only
|
||||
be pertinent and available on machines utilizing CONFIG_SMP.
|
||||
|
||||
In version 10 of schedstat, there is at least one level of domain
|
||||
In version 14 of schedstat, there is at least one level of domain
|
||||
statistics for each cpu listed, and there may well be more than one
|
||||
domain. Domains have no particular names in this implementation, but
|
||||
the highest numbered one typically arbitrates balancing across all the
|
||||
@ -27,7 +28,7 @@ to write their own scripts, the fields are described here.
|
||||
|
||||
CPU statistics
|
||||
--------------
|
||||
cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
|
||||
cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12
|
||||
|
||||
NOTE: In the sched_yield() statistics, the active queue is considered empty
|
||||
if it has only one process in it, since obviously the process calling
|
||||
@ -39,48 +40,20 @@ First four fields are sched_yield() statistics:
|
||||
3) # of times just the expired queue was empty
|
||||
4) # of times sched_yield() was called
|
||||
|
||||
Next four are schedule() statistics:
|
||||
5) # of times the active queue had at least one other process on it
|
||||
6) # of times we switched to the expired queue and reused it
|
||||
7) # of times schedule() was called
|
||||
8) # of times schedule() left the processor idle
|
||||
Next three are schedule() statistics:
|
||||
5) # of times we switched to the expired queue and reused it
|
||||
6) # of times schedule() was called
|
||||
7) # of times schedule() left the processor idle
|
||||
|
||||
Next four are active_load_balance() statistics:
|
||||
9) # of times active_load_balance() was called
|
||||
10) # of times active_load_balance() caused this cpu to gain a task
|
||||
11) # of times active_load_balance() caused this cpu to lose a task
|
||||
12) # of times active_load_balance() tried to move a task and failed
|
||||
|
||||
Next three are try_to_wake_up() statistics:
|
||||
13) # of times try_to_wake_up() was called
|
||||
14) # of times try_to_wake_up() successfully moved the awakening task
|
||||
15) # of times try_to_wake_up() attempted to move the awakening task
|
||||
|
||||
Next two are wake_up_new_task() statistics:
|
||||
16) # of times wake_up_new_task() was called
|
||||
17) # of times wake_up_new_task() successfully moved the new task
|
||||
|
||||
Next one is a sched_migrate_task() statistic:
|
||||
18) # of times sched_migrate_task() was called
|
||||
|
||||
Next one is a sched_balance_exec() statistic:
|
||||
19) # of times sched_balance_exec() was called
|
||||
Next two are try_to_wake_up() statistics:
|
||||
8) # of times try_to_wake_up() was called
|
||||
9) # of times try_to_wake_up() was called to wake up the local cpu
|
||||
|
||||
Next three are statistics describing scheduling latency:
|
||||
20) sum of all time spent running by tasks on this processor (in ms)
|
||||
21) sum of all time spent waiting to run by tasks on this processor (in ms)
|
||||
22) # of tasks (not necessarily unique) given to the processor
|
||||
|
||||
The last six are statistics dealing with pull_task():
|
||||
23) # of times pull_task() moved a task to this cpu when newly idle
|
||||
24) # of times pull_task() stole a task from this cpu when another cpu
|
||||
was newly idle
|
||||
25) # of times pull_task() moved a task to this cpu when idle
|
||||
26) # of times pull_task() stole a task from this cpu when another cpu
|
||||
was idle
|
||||
27) # of times pull_task() moved a task to this cpu when busy
|
||||
28) # of times pull_task() stole a task from this cpu when another cpu
|
||||
was busy
|
||||
10) sum of all time spent running by tasks on this processor (in jiffies)
|
||||
11) sum of all time spent waiting to run by tasks on this processor (in
|
||||
jiffies)
|
||||
12) # of timeslices run on this cpu
|
||||
|
||||
|
||||
Domain statistics
|
||||
@ -89,65 +62,95 @@ One of these is produced per domain for each cpu described. (Note that if
|
||||
CONFIG_SMP is not defined, *no* domains are utilized and these lines
|
||||
will not appear in the output.)
|
||||
|
||||
domain<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
|
||||
domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
|
||||
|
||||
The first field is a bit mask indicating what cpus this domain operates over.
|
||||
|
||||
The next fifteen are a variety of load_balance() statistics:
|
||||
The next 24 are a variety of load_balance() statistics in grouped into types
|
||||
of idleness (idle, busy, and newly idle):
|
||||
|
||||
1) # of times in this domain load_balance() was called when the cpu
|
||||
was idle
|
||||
2) # of times in this domain load_balance() was called when the cpu
|
||||
was busy
|
||||
3) # of times in this domain load_balance() was called when the cpu
|
||||
was just becoming idle
|
||||
4) # of times in this domain load_balance() tried to move one or more
|
||||
tasks and failed, when the cpu was idle
|
||||
5) # of times in this domain load_balance() tried to move one or more
|
||||
tasks and failed, when the cpu was busy
|
||||
6) # of times in this domain load_balance() tried to move one or more
|
||||
tasks and failed, when the cpu was just becoming idle
|
||||
7) sum of imbalances discovered (if any) with each call to
|
||||
load_balance() in this domain when the cpu was idle
|
||||
8) sum of imbalances discovered (if any) with each call to
|
||||
load_balance() in this domain when the cpu was busy
|
||||
9) sum of imbalances discovered (if any) with each call to
|
||||
load_balance() in this domain when the cpu was just becoming idle
|
||||
10) # of times in this domain load_balance() was called but did not find
|
||||
a busier queue while the cpu was idle
|
||||
11) # of times in this domain load_balance() was called but did not find
|
||||
a busier queue while the cpu was busy
|
||||
12) # of times in this domain load_balance() was called but did not find
|
||||
a busier queue while the cpu was just becoming idle
|
||||
13) # of times in this domain a busier queue was found while the cpu was
|
||||
idle but no busier group was found
|
||||
14) # of times in this domain a busier queue was found while the cpu was
|
||||
busy but no busier group was found
|
||||
15) # of times in this domain a busier queue was found while the cpu was
|
||||
just becoming idle but no busier group was found
|
||||
1) # of times in this domain load_balance() was called when the
|
||||
cpu was idle
|
||||
2) # of times in this domain load_balance() checked but found
|
||||
the load did not require balancing when the cpu was idle
|
||||
3) # of times in this domain load_balance() tried to move one or
|
||||
more tasks and failed, when the cpu was idle
|
||||
4) sum of imbalances discovered (if any) with each call to
|
||||
load_balance() in this domain when the cpu was idle
|
||||
5) # of times in this domain pull_task() was called when the cpu
|
||||
was idle
|
||||
6) # of times in this domain pull_task() was called even though
|
||||
the target task was cache-hot when idle
|
||||
7) # of times in this domain load_balance() was called but did
|
||||
not find a busier queue while the cpu was idle
|
||||
8) # of times in this domain a busier queue was found while the
|
||||
cpu was idle but no busier group was found
|
||||
|
||||
Next two are sched_balance_exec() statistics:
|
||||
17) # of times in this domain sched_balance_exec() successfully pushed
|
||||
a task to a new cpu
|
||||
18) # of times in this domain sched_balance_exec() tried but failed to
|
||||
push a task to a new cpu
|
||||
9) # of times in this domain load_balance() was called when the
|
||||
cpu was busy
|
||||
10) # of times in this domain load_balance() checked but found the
|
||||
load did not require balancing when busy
|
||||
11) # of times in this domain load_balance() tried to move one or
|
||||
more tasks and failed, when the cpu was busy
|
||||
12) sum of imbalances discovered (if any) with each call to
|
||||
load_balance() in this domain when the cpu was busy
|
||||
13) # of times in this domain pull_task() was called when busy
|
||||
14) # of times in this domain pull_task() was called even though the
|
||||
target task was cache-hot when busy
|
||||
15) # of times in this domain load_balance() was called but did not
|
||||
find a busier queue while the cpu was busy
|
||||
16) # of times in this domain a busier queue was found while the cpu
|
||||
was busy but no busier group was found
|
||||
|
||||
Next two are try_to_wake_up() statistics:
|
||||
19) # of times in this domain try_to_wake_up() tried to move a task based
|
||||
on affinity and cache warmth
|
||||
20) # of times in this domain try_to_wake_up() tried to move a task based
|
||||
on load balancing
|
||||
17) # of times in this domain load_balance() was called when the
|
||||
cpu was just becoming idle
|
||||
18) # of times in this domain load_balance() checked but found the
|
||||
load did not require balancing when the cpu was just becoming idle
|
||||
19) # of times in this domain load_balance() tried to move one or more
|
||||
tasks and failed, when the cpu was just becoming idle
|
||||
20) sum of imbalances discovered (if any) with each call to
|
||||
load_balance() in this domain when the cpu was just becoming idle
|
||||
21) # of times in this domain pull_task() was called when newly idle
|
||||
22) # of times in this domain pull_task() was called even though the
|
||||
target task was cache-hot when just becoming idle
|
||||
23) # of times in this domain load_balance() was called but did not
|
||||
find a busier queue while the cpu was just becoming idle
|
||||
24) # of times in this domain a busier queue was found while the cpu
|
||||
was just becoming idle but no busier group was found
|
||||
|
||||
Next three are active_load_balance() statistics:
|
||||
25) # of times active_load_balance() was called
|
||||
26) # of times active_load_balance() tried to move a task and failed
|
||||
27) # of times active_load_balance() successfully moved a task
|
||||
|
||||
Next three are sched_balance_exec() statistics:
|
||||
28) sbe_cnt is not used
|
||||
29) sbe_balanced is not used
|
||||
30) sbe_pushed is not used
|
||||
|
||||
Next three are sched_balance_fork() statistics:
|
||||
31) sbf_cnt is not used
|
||||
32) sbf_balanced is not used
|
||||
33) sbf_pushed is not used
|
||||
|
||||
Next three are try_to_wake_up() statistics:
|
||||
34) # of times in this domain try_to_wake_up() awoke a task that
|
||||
last ran on a different cpu in this domain
|
||||
35) # of times in this domain try_to_wake_up() moved a task to the
|
||||
waking cpu because it was cache-cold on its own cpu anyway
|
||||
36) # of times in this domain try_to_wake_up() started passive balancing
|
||||
|
||||
/proc/<pid>/schedstat
|
||||
----------------
|
||||
schedstats also adds a new /proc/<pid/schedstat file to include some of
|
||||
the same information on a per-process level. There are three fields in
|
||||
this file correlating to fields 20, 21, and 22 in the CPU fields, but
|
||||
they only apply for that process.
|
||||
this file correlating for that process to:
|
||||
1) time spent on the cpu
|
||||
2) time spent waiting on a runqueue
|
||||
3) # of timeslices run on this cpu
|
||||
|
||||
A program could be easily written to make use of these extra fields to
|
||||
report on how well a particular process or set of processes is faring
|
||||
under the scheduler's policies. A simple version of such a program is
|
||||
available at
|
||||
http://eaglet.rain.com/rick/linux/schedstat/v10/latency.c
|
||||
http://eaglet.rain.com/rick/linux/schedstat/v12/latency.c
|
||||
|
@ -467,7 +467,12 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
||||
above explicitly.
|
||||
|
||||
The power-management is supported.
|
||||
|
||||
|
||||
Module snd-cs5530
|
||||
_________________
|
||||
|
||||
Module for Cyrix/NatSemi Geode 5530 chip.
|
||||
|
||||
Module snd-cs5535audio
|
||||
----------------------
|
||||
|
||||
@ -759,6 +764,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
||||
|
||||
model - force the model name
|
||||
position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size)
|
||||
probe_mask - Bitmask to probe codecs (default = -1, meaning all slots)
|
||||
single_cmd - Use single immediate commands to communicate with
|
||||
codecs (for debugging only)
|
||||
enable_msi - Enable Message Signaled Interrupt (MSI) (default = off)
|
||||
@ -803,6 +809,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
||||
hp-3013 HP machines (3013-variant)
|
||||
fujitsu Fujitsu S7020
|
||||
acer Acer TravelMate
|
||||
will Will laptops (PB V7900)
|
||||
replacer Replacer 672V
|
||||
basic fixed pin assignment (old default model)
|
||||
auto auto-config reading BIOS (default)
|
||||
|
||||
@ -811,16 +819,31 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
||||
hp-bpc HP xw4400/6400/8400/9400 laptops
|
||||
hp-bpc-d7000 HP BPC D7000
|
||||
benq Benq ED8
|
||||
benq-t31 Benq T31
|
||||
hippo Hippo (ATI) with jack detection, Sony UX-90s
|
||||
hippo_1 Hippo (Benq) with jack detection
|
||||
sony-assamd Sony ASSAMD
|
||||
basic fixed pin assignment w/o SPDIF
|
||||
auto auto-config reading BIOS (default)
|
||||
|
||||
ALC268
|
||||
3stack 3-stack model
|
||||
auto auto-config reading BIOS (default)
|
||||
|
||||
ALC662
|
||||
3stack-dig 3-stack (2-channel) with SPDIF
|
||||
3stack-6ch 3-stack (6-channel)
|
||||
3stack-6ch-dig 3-stack (6-channel) with SPDIF
|
||||
6stack-dig 6-stack with SPDIF
|
||||
lenovo-101e Lenovo laptop
|
||||
auto auto-config reading BIOS (default)
|
||||
|
||||
ALC882/885
|
||||
3stack-dig 3-jack with SPDIF I/O
|
||||
6stack-dig 6-jack digital with SPDIF I/O
|
||||
arima Arima W820Di1
|
||||
macpro MacPro support
|
||||
imac24 iMac 24'' with jack detection
|
||||
w2jc ASUS W2JC
|
||||
auto auto-config reading BIOS (default)
|
||||
|
||||
@ -832,9 +855,15 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
||||
6stack-dig-demo 6-jack digital for Intel demo board
|
||||
acer Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc)
|
||||
medion Medion Laptops
|
||||
medion-md2 Medion MD2
|
||||
targa-dig Targa/MSI
|
||||
targa-2ch-dig Targs/MSI with 2-channel
|
||||
laptop-eapd 3-jack with SPDIF I/O and EAPD (Clevo M540JE, M550JE)
|
||||
lenovo-101e Lenovo 101E
|
||||
lenovo-nb0763 Lenovo NB0763
|
||||
lenovo-ms7195-dig Lenovo MS7195
|
||||
6stack-hp HP machines with 6stack (Nettle boards)
|
||||
3stack-hp HP machines with 3stack (Lucknow, Samba boards)
|
||||
auto auto-config reading BIOS (default)
|
||||
|
||||
ALC861/660
|
||||
@ -853,7 +882,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
||||
3stack-dig 3-jack with SPDIF OUT
|
||||
6stack-dig 6-jack with SPDIF OUT
|
||||
3stack-660 3-jack (for ALC660VD)
|
||||
3stack-660-digout 3-jack with SPDIF OUT (for ALC660VD)
|
||||
lenovo Lenovo 3000 C200
|
||||
dallas Dallas laptops
|
||||
auto auto-config reading BIOS (default)
|
||||
|
||||
CMI9880
|
||||
@ -864,12 +895,26 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
||||
allout 5-jack in back, 2-jack in front, SPDIF out
|
||||
auto auto-config reading BIOS (default)
|
||||
|
||||
AD1882
|
||||
3stack 3-stack mode (default)
|
||||
6stack 6-stack mode
|
||||
|
||||
AD1884
|
||||
N/A
|
||||
|
||||
AD1981
|
||||
basic 3-jack (default)
|
||||
hp HP nx6320
|
||||
thinkpad Lenovo Thinkpad T60/X60/Z60
|
||||
toshiba Toshiba U205
|
||||
|
||||
AD1983
|
||||
N/A
|
||||
|
||||
AD1984
|
||||
basic default configuration
|
||||
thinkpad Lenovo Thinkpad T61/X61
|
||||
|
||||
AD1986A
|
||||
6stack 6-jack, separate surrounds (default)
|
||||
3stack 3-stack, shared surrounds
|
||||
@ -907,11 +952,18 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
||||
ref Reference board
|
||||
3stack D945 3stack
|
||||
5stack D945 5stack + SPDIF
|
||||
macmini Intel Mac Mini
|
||||
macbook Intel Mac Book
|
||||
macbook-pro-v1 Intel Mac Book Pro 1st generation
|
||||
macbook-pro Intel Mac Book Pro 2nd generation
|
||||
imac-intel Intel iMac
|
||||
dell Dell XPS M1210
|
||||
intel-mac-v1 Intel Mac Type 1
|
||||
intel-mac-v2 Intel Mac Type 2
|
||||
intel-mac-v3 Intel Mac Type 3
|
||||
intel-mac-v4 Intel Mac Type 4
|
||||
intel-mac-v5 Intel Mac Type 5
|
||||
macmini Intel Mac Mini (equivalent with type 3)
|
||||
macbook Intel Mac Book (eq. type 5)
|
||||
macbook-pro-v1 Intel Mac Book Pro 1st generation (eq. type 3)
|
||||
macbook-pro Intel Mac Book Pro 2nd generation (eq. type 3)
|
||||
imac-intel Intel iMac (eq. type 2)
|
||||
imac-intel-20 Intel iMac (newer version) (eq. type 3)
|
||||
|
||||
STAC9202/9250/9251
|
||||
ref Reference board, base config
|
||||
@ -956,6 +1008,17 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
||||
from the irq. Remember this is a last resort, and should be
|
||||
avoided as much as possible...
|
||||
|
||||
MORE NOTES ON "azx_get_response timeout" PROBLEMS:
|
||||
On some hardwares, you may need to add a proper probe_mask option
|
||||
to avoid the "azx_get_response timeout" problem above, instead.
|
||||
This occurs when the access to non-existing or non-working codec slot
|
||||
(likely a modem one) causes a stall of the communication via HD-audio
|
||||
bus. You can see which codec slots are probed by enabling
|
||||
CONFIG_SND_DEBUG_DETECT, or simply from the file name of the codec
|
||||
proc files. Then limit the slots to probe by probe_mask option.
|
||||
For example, probe_mask=1 means to probe only the first slot, and
|
||||
probe_mask=4 means only the third slot.
|
||||
|
||||
The power-management is supported.
|
||||
|
||||
Module snd-hdsp
|
||||
|
@ -1,4 +1,4 @@
|
||||
Guide to using M-Audio Audiophile USB with ALSA and Jack v1.3
|
||||
Guide to using M-Audio Audiophile USB with ALSA and Jack v1.5
|
||||
========================================================
|
||||
|
||||
Thibault Le Meur <Thibault.LeMeur@supelec.fr>
|
||||
@ -6,8 +6,19 @@
|
||||
This document is a guide to using the M-Audio Audiophile USB (tm) device with
|
||||
ALSA and JACK.
|
||||
|
||||
History
|
||||
=======
|
||||
* v1.4 - Thibault Le Meur (2007-07-11)
|
||||
- Added Low Endianness nature of 16bits-modes
|
||||
found by Hakan Lennestal <Hakan.Lennestal@brfsodrahamn.se>
|
||||
- Modifying document structure
|
||||
* v1.5 - Thibault Le Meur (2007-07-12)
|
||||
- Added AC3/DTS passthru info
|
||||
|
||||
|
||||
1 - Audiophile USB Specs and correct usage
|
||||
==========================================
|
||||
|
||||
This part is a reminder of important facts about the functions and limitations
|
||||
of the device.
|
||||
|
||||
@ -25,18 +36,18 @@ The device has 4 audio interfaces, and 2 MIDI ports:
|
||||
The internal DAC/ADC has the following characteristics:
|
||||
* sample depth of 16 or 24 bits
|
||||
* sample rate from 8kHz to 96kHz
|
||||
* Two ports can't use different sample depths at the same time. Moreover, the
|
||||
Audiophile USB documentation gives the following Warning: "Please exit any
|
||||
audio application running before switching between bit depths"
|
||||
* Two interfaces can't use different sample depths at the same time.
|
||||
Moreover, the Audiophile USB documentation gives the following Warning:
|
||||
"Please exit any audio application running before switching between bit depths"
|
||||
|
||||
Due to the USB 1.1 bandwidth limitation, a limited number of interfaces can be
|
||||
activated at the same time depending on the audio mode selected:
|
||||
* 16-bit/48kHz ==> 4 channels in/4 channels out
|
||||
* 16-bit/48kHz ==> 4 channels in + 4 channels out
|
||||
- Ai+Ao+Di+Do
|
||||
* 24-bit/48kHz ==> 4 channels in/2 channels out,
|
||||
or 2 channels in/4 channels out
|
||||
* 24-bit/48kHz ==> 4 channels in + 2 channels out,
|
||||
or 2 channels in + 4 channels out
|
||||
- Ai+Ao+Do or Ai+Di+Ao or Ai+Di+Do or Di+Ao+Do
|
||||
* 24-bit/96kHz ==> 2 channels in, or 2 channels out (half duplex only)
|
||||
* 24-bit/96kHz ==> 2 channels in _or_ 2 channels out (half duplex only)
|
||||
- Ai or Ao or Di or Do
|
||||
|
||||
Important facts about the Digital interface:
|
||||
@ -52,44 +63,56 @@ source is connected
|
||||
synchronization error (for instance sound played at an odd sample rate)
|
||||
|
||||
|
||||
2 - Audiophile USB support in ALSA
|
||||
==================================
|
||||
2 - Audiophile USB MIDI support in ALSA
|
||||
=======================================
|
||||
|
||||
2.1 - MIDI ports
|
||||
----------------
|
||||
The Audiophile USB MIDI ports will be automatically supported once the
|
||||
The Audiophile USB MIDI ports will be automatically supported once the
|
||||
following modules have been loaded:
|
||||
* snd-usb-audio
|
||||
* snd-seq-midi
|
||||
|
||||
No additional setting is required.
|
||||
|
||||
2.2 - Audio ports
|
||||
-----------------
|
||||
|
||||
3 - Audiophile USB Audio support in ALSA
|
||||
========================================
|
||||
|
||||
Audio functions of the Audiophile USB device are handled by the snd-usb-audio
|
||||
module. This module can work in a default mode (without any device-specific
|
||||
parameter), or in an "advanced" mode with the device-specific parameter called
|
||||
"device_setup".
|
||||
|
||||
2.2.1 - Default Alsa driver mode
|
||||
3.1 - Default Alsa driver mode
|
||||
------------------------------
|
||||
|
||||
The default behavior of the snd-usb-audio driver is to parse the device
|
||||
capabilities at startup and enable all functions inside the device (including
|
||||
all ports at any supported sample rates and sample depths). This approach
|
||||
has the advantage to let the driver easily switch from sample rates/depths
|
||||
automatically according to the need of the application claiming the device.
|
||||
The default behavior of the snd-usb-audio driver is to list the device
|
||||
capabilities at startup and activate the required mode when required
|
||||
by the applications: for instance if the user is recording in a
|
||||
24bit-depth-mode and immediately after wants to switch to a 16bit-depth mode,
|
||||
the snd-usb-audio module will reconfigure the device on the fly.
|
||||
|
||||
In this case the Audiophile ports are mapped to alsa pcm devices in the
|
||||
following way (I suppose the device's index is 1):
|
||||
This approach has the advantage to let the driver automatically switch from sample
|
||||
rates/depths automatically according to the user's needs. However, those who
|
||||
are using the device under windows know that this is not how the device is meant to
|
||||
work: under windows applications must be closed before using the m-audio control
|
||||
panel to switch the device working mode. Thus as we'll see in next section, this
|
||||
Default Alsa driver mode can lead to device misconfigurations.
|
||||
|
||||
Let's get back to the Default Alsa driver mode for now. In this case the
|
||||
Audiophile interfaces are mapped to alsa pcm devices in the following
|
||||
way (I suppose the device's index is 1):
|
||||
* hw:1,0 is Ao in playback and Di in capture
|
||||
* hw:1,1 is Do in playback and Ai in capture
|
||||
* hw:1,2 is Do in AC3/DTS passthrough mode
|
||||
|
||||
You must note as well that the device uses Big Endian byte encoding so that
|
||||
supported audio format are S16_BE for 16-bit depth modes and S24_3BE for
|
||||
24-bits depth mode. One exception is the hw:1,2 port which is Little Endian
|
||||
compliant and thus uses S16_LE.
|
||||
In this mode, the device uses Big Endian byte-encoding so that
|
||||
supported audio format are S16_BE for 16-bit depth modes and S24_3BE for
|
||||
24-bits depth mode.
|
||||
|
||||
One exception is the hw:1,2 port which was reported to be Little Endian
|
||||
compliant (supposedly supporting S16_LE) but processes in fact only S16_BE streams.
|
||||
This has been fixed in kernel 2.6.23 and above and now the hw:1,2 interface
|
||||
is reported to be big endian in this default driver mode.
|
||||
|
||||
Examples:
|
||||
* playing a S24_3BE encoded raw file to the Ao port
|
||||
@ -98,22 +121,26 @@ Examples:
|
||||
% arecord -D hw:1,1 -c2 -t raw -r48000 -fS24_3BE test.raw
|
||||
* playing a S16_BE encoded raw file to the Do port
|
||||
% aplay -D hw:1,1 -c2 -t raw -r48000 -fS16_BE test.raw
|
||||
* playing an ac3 sample file to the Do port
|
||||
% aplay -D hw:1,2 --channels=6 ac3_S16_BE_encoded_file.raw
|
||||
|
||||
If you're happy with the default Alsa driver setup and don't experience any
|
||||
If you're happy with the default Alsa driver mode and don't experience any
|
||||
issue with this mode, then you can skip the following chapter.
|
||||
|
||||
2.2.2 - Advanced module setup
|
||||
3.2 - Advanced module setup
|
||||
---------------------------
|
||||
|
||||
Due to the hardware constraints described above, the device initialization made
|
||||
by the Alsa driver in default mode may result in a corrupted state of the
|
||||
device. For instance, a particularly annoying issue is that the sound captured
|
||||
from the Ai port sounds distorted (as if boosted with an excessive high volume
|
||||
gain).
|
||||
from the Ai interface sounds distorted (as if boosted with an excessive high
|
||||
volume gain).
|
||||
|
||||
For people having this problem, the snd-usb-audio module has a new module
|
||||
parameter called "device_setup".
|
||||
parameter called "device_setup" (this parameter was introduced in kernel
|
||||
release 2.6.17)
|
||||
|
||||
2.2.2.1 - Initializing the working mode of the Audiophile USB
|
||||
3.2.1 - Initializing the working mode of the Audiophile USB
|
||||
|
||||
As far as the Audiophile USB device is concerned, this value let the user
|
||||
specify:
|
||||
@ -121,33 +148,57 @@ specify:
|
||||
* the sample rate
|
||||
* whether the Di port is used or not
|
||||
|
||||
Here is a list of supported device_setup values for this device:
|
||||
* device_setup=0x00 (or omitted)
|
||||
- Alsa driver default mode
|
||||
- maintains backward compatibility with setups that do not use this
|
||||
parameter by not introducing any change
|
||||
- results sometimes in corrupted sound as described earlier
|
||||
When initialized with "device_setup=0x00", the snd-usb-audio module has
|
||||
the same behaviour as when the parameter is omitted (see paragraph "Default
|
||||
Alsa driver mode" above)
|
||||
|
||||
Others modes are described in the following subsections.
|
||||
|
||||
3.2.1.1 - 16-bit modes
|
||||
|
||||
The two supported modes are:
|
||||
|
||||
* device_setup=0x01
|
||||
- 16bits 48kHz mode with Di disabled
|
||||
- Ai,Ao,Do can be used at the same time
|
||||
- hw:1,0 is not available in capture mode
|
||||
- hw:1,2 is not available
|
||||
|
||||
* device_setup=0x11
|
||||
- 16bits 48kHz mode with Di enabled
|
||||
- Ai,Ao,Di,Do can be used at the same time
|
||||
- hw:1,0 is available in capture mode
|
||||
- hw:1,2 is not available
|
||||
|
||||
In this modes the device operates only at 16bits-modes. Before kernel 2.6.23,
|
||||
the devices where reported to be Big-Endian when in fact they were Little-Endian
|
||||
so that playing a file was a matter of using:
|
||||
% aplay -D hw:1,1 -c2 -t raw -r48000 -fS16_BE test_S16_LE.raw
|
||||
where "test_S16_LE.raw" was in fact a little-endian sample file.
|
||||
|
||||
Thanks to Hakan Lennestal (who discovered the Little-Endiannes of the device in
|
||||
these modes) a fix has been committed (expected in kernel 2.6.23) and
|
||||
Alsa now reports Little-Endian interfaces. Thus playing a file now is as simple as
|
||||
using:
|
||||
% aplay -D hw:1,1 -c2 -t raw -r48000 -fS16_LE test_S16_LE.raw
|
||||
|
||||
3.2.1.2 - 24-bit modes
|
||||
|
||||
The three supported modes are:
|
||||
|
||||
* device_setup=0x09
|
||||
- 24bits 48kHz mode with Di disabled
|
||||
- Ai,Ao,Do can be used at the same time
|
||||
- hw:1,0 is not available in capture mode
|
||||
- hw:1,2 is not available
|
||||
|
||||
* device_setup=0x19
|
||||
- 24bits 48kHz mode with Di enabled
|
||||
- 3 ports from {Ai,Ao,Di,Do} can be used at the same time
|
||||
- hw:1,0 is available in capture mode and an active digital source must be
|
||||
connected to Di
|
||||
- hw:1,2 is not available
|
||||
|
||||
* device_setup=0x0D or 0x10
|
||||
- 24bits 96kHz mode
|
||||
- Di is enabled by default for this mode but does not need to be connected
|
||||
@ -155,34 +206,64 @@ Here is a list of supported device_setup values for this device:
|
||||
- Only 1 port from {Ai,Ao,Di,Do} can be used at the same time
|
||||
- hw:1,0 is available in captured mode
|
||||
- hw:1,2 is not available
|
||||
|
||||
In these modes the device is only Big-Endian compliant (see "Default Alsa driver
|
||||
mode" above for an aplay command example)
|
||||
|
||||
3.2.1.3 - AC3 w/ DTS passthru mode
|
||||
|
||||
Thanks to Hakan Lennestal, I now have a report saying that this mode works.
|
||||
|
||||
* device_setup=0x03
|
||||
- 16bits 48kHz mode with only the Do port enabled
|
||||
- AC3 with DTS passthru (not tested)
|
||||
- AC3 with DTS passthru
|
||||
- Caution with this setup the Do port is mapped to the pcm device hw:1,0
|
||||
|
||||
2.2.2.2 - Setting and switching configurations with the device_setup parameter
|
||||
The command line used to playback the AC3/DTS encoded .wav-files in this mode:
|
||||
% aplay -D hw:1,0 --channels=6 ac3_S16_LE_encoded_file.raw
|
||||
|
||||
3.2.2 - How to use the device_setup parameter
|
||||
----------------------------------------------
|
||||
|
||||
The parameter can be given:
|
||||
|
||||
* By manually probing the device (as root):
|
||||
# modprobe -r snd-usb-audio
|
||||
# modprobe snd-usb-audio index=1 device_setup=0x09
|
||||
|
||||
* Or while configuring the modules options in your modules configuration file
|
||||
- For Fedora distributions, edit the /etc/modprobe.conf file:
|
||||
alias snd-card-1 snd-usb-audio
|
||||
options snd-usb-audio index=1 device_setup=0x09
|
||||
|
||||
IMPORTANT NOTE WHEN SWITCHING CONFIGURATION:
|
||||
-------------------------------------------
|
||||
* You may need to _first_ initialize the module with the correct device_setup
|
||||
parameter and _only_after_ turn on the Audiophile USB device
|
||||
* This is especially true when switching the sample depth:
|
||||
CAUTION when initializaing the device
|
||||
-------------------------------------
|
||||
|
||||
* Correct initialization on the device requires that device_setup is given to
|
||||
the module BEFORE the device is turned on. So, if you use the "manual probing"
|
||||
method described above, take care to power-on the device AFTER this initialization.
|
||||
|
||||
* Failing to respect this will lead in a misconfiguration of the device. In this case
|
||||
turn off the device, unproble the snd-usb-audio module, then probe it again with
|
||||
correct device_setup parameter and then (and only then) turn on the device again.
|
||||
|
||||
* If you've correctly initialized the device in a valid mode and then want to switch
|
||||
to another mode (possibly with another sample-depth), please use also the following
|
||||
procedure:
|
||||
- first turn off the device
|
||||
- de-register the snd-usb-audio module (modprobe -r)
|
||||
- change the device_setup parameter by changing the device_setup
|
||||
option in /etc/modprobe.conf
|
||||
- turn on the device
|
||||
* A workaround for this last issue has been applied to kernel 2.6.23, but it may not
|
||||
be enough to ensure the 'stability' of the device initialization.
|
||||
|
||||
2.2.2.3 - Audiophile USB's device_setup structure
|
||||
3.2.3 - Technical details for hackers
|
||||
-------------------------------------
|
||||
This section is for hackers, wanting to understand details about the device
|
||||
internals and how Alsa supports it.
|
||||
|
||||
3.2.3.1 - Audiophile USB's device_setup structure
|
||||
|
||||
If you want to understand the device_setup magic numbers for the Audiophile
|
||||
USB, you need some very basic understanding of binary computation. However,
|
||||
@ -228,12 +309,12 @@ Caution:
|
||||
- choosing b2 will prepare all interfaces for 24bits/96kHz but you'll
|
||||
only be able to use one at the same time
|
||||
|
||||
2.2.3 - USB implementation details for this device
|
||||
3.2.3.2 - USB implementation details for this device
|
||||
|
||||
You may safely skip this section if you're not interested in driver
|
||||
development.
|
||||
hacking.
|
||||
|
||||
This section describes some internal aspects of the device and summarize the
|
||||
This section describes some internal aspects of the device and summarizes the
|
||||
data I got by usb-snooping the windows and Linux drivers.
|
||||
|
||||
The M-Audio Audiophile USB has 7 USB Interfaces:
|
||||
@ -293,43 +374,45 @@ parse_audio_endpoints function uses a quirk called
|
||||
"audiophile_skip_setting_quirk" in order to prevent AltSettings not
|
||||
corresponding to device_setup from being registered in the driver.
|
||||
|
||||
3 - Audiophile USB and Jack support
|
||||
4 - Audiophile USB and Jack support
|
||||
===================================
|
||||
|
||||
This section deals with support of the Audiophile USB device in Jack.
|
||||
The main issue regarding this support is that the device is Big Endian
|
||||
compliant.
|
||||
|
||||
3.1 - Using the plug alsa plugin
|
||||
--------------------------------
|
||||
There are 2 main potential issues when using Jackd with the device:
|
||||
* support for Big-Endian devices in 24-bit modes
|
||||
* support for 4-in / 4-out channels
|
||||
|
||||
Jack doesn't directly support big endian devices. Thus, one way to have support
|
||||
for this device with Alsa is to use the Alsa "plug" converter.
|
||||
4.1 - Direct support in Jackd
|
||||
-----------------------------
|
||||
|
||||
Jack supports big endian devices only in recent versions (thanks to
|
||||
Andreas Steinmetz for his first big-endian patch). I can't remember
|
||||
extacly when this support was released into jackd, let's just say that
|
||||
with jackd version 0.103.0 it's almost ok (just a small bug is affecting
|
||||
16bits Big-Endian devices, but since you've read carefully the above
|
||||
paragraphs, you're now using kernel >= 2.6.23 and your 16bits devices
|
||||
are now Little Endians ;-) ).
|
||||
|
||||
You can run jackd with the following command for playback with Ao and
|
||||
record with Ai:
|
||||
% jackd -R -dalsa -Phw:1,0 -r48000 -p128 -n2 -D -Chw:1,1
|
||||
|
||||
4.2 - Using Alsa plughw
|
||||
-----------------------
|
||||
If you don't have a recent Jackd installed, you can downgrade to using
|
||||
the Alsa "plug" converter.
|
||||
|
||||
For instance here is one way to run Jack with 2 playback channels on Ao and 2
|
||||
capture channels from Ai:
|
||||
% jackd -R -dalsa -dplughw:1 -r48000 -p256 -n2 -D -Cplughw:1,1
|
||||
|
||||
|
||||
However you may see the following warning message:
|
||||
"You appear to be using the ALSA software "plug" layer, probably a result of
|
||||
using the "default" ALSA device. This is less efficient than it could be.
|
||||
Consider using a hardware device instead rather than using the plug layer."
|
||||
|
||||
3.2 - Patching alsa to use direct pcm device
|
||||
--------------------------------------------
|
||||
A patch for Jack by Andreas Steinmetz adds support for Big Endian devices.
|
||||
However it has not been included in the CVS tree.
|
||||
|
||||
You can find it at the following URL:
|
||||
http://sourceforge.net/tracker/index.php?func=detail&aid=1289682&group_id=39687&
|
||||
atid=425939
|
||||
|
||||
After having applied the patch you can run jackd with the following command
|
||||
line:
|
||||
% jackd -R -dalsa -Phw:1,0 -r48000 -p128 -n2 -D -Chw:1,1
|
||||
|
||||
3.2 - Getting 2 input and/or output interfaces in Jack
|
||||
4.3 - Getting 2 input and/or output interfaces in Jack
|
||||
------------------------------------------------------
|
||||
|
||||
As you can see, starting the Jack server this way will only enable 1 stereo
|
||||
@ -339,6 +422,7 @@ This is due to the following restrictions:
|
||||
* Jack can only open one capture device and one playback device at a time
|
||||
* The Audiophile USB is seen as 2 (or three) Alsa devices: hw:1,0, hw:1,1
|
||||
(and optionally hw:1,2)
|
||||
|
||||
If you want to get Ai+Di and/or Ao+Do support with Jack, you would need to
|
||||
combine the Alsa devices into one logical "complex" device.
|
||||
|
||||
@ -348,13 +432,11 @@ It is related to another device (ice1712) but can be adapted to suit
|
||||
the Audiophile USB.
|
||||
|
||||
Enabling multiple Audiophile USB interfaces for Jackd will certainly require:
|
||||
* patching Jack with the previously mentioned "Big Endian" patch
|
||||
* patching Jackd with the MMAP_COMPLEX patch (see the ice1712 page)
|
||||
* patching the alsa-lib/src/pcm/pcm_multi.c file (see the ice1712 page)
|
||||
* Making sure your Jackd version has the MMAP_COMPLEX patch (see the ice1712 page)
|
||||
* (maybe) patching the alsa-lib/src/pcm/pcm_multi.c file (see the ice1712 page)
|
||||
* define a multi device (combination of hw:1,0 and hw:1,1) in your .asoundrc
|
||||
file
|
||||
* start jackd with this device
|
||||
|
||||
I had no success in testing this for now, but this may be due to my OS
|
||||
configuration. If you have any success with this kind of setup, please
|
||||
drop me an email.
|
||||
I had no success in testing this for now, if you have any success with this kind
|
||||
of setup, please drop me an email.
|
||||
|
@ -278,6 +278,21 @@ current mixer configuration by reading and writing the whole file
|
||||
image.
|
||||
|
||||
|
||||
Duplex Streams
|
||||
==============
|
||||
|
||||
Note that when attempting to use a single device file for playback and
|
||||
capture, the OSS API provides no way to set the format, sample rate or
|
||||
number of channels different in each direction. Thus
|
||||
io_handle = open("device", O_RDWR)
|
||||
will only function correctly if the values are the same in each direction.
|
||||
|
||||
To use different values in the two directions, use both
|
||||
input_handle = open("device", O_RDONLY)
|
||||
output_handle = open("device", O_WRONLY)
|
||||
and set the values for the corresponding handle.
|
||||
|
||||
|
||||
Unsupported Features
|
||||
====================
|
||||
|
||||
|
202
Documentation/spi/spidev_test.c
Normal file
202
Documentation/spi/spidev_test.c
Normal file
@ -0,0 +1,202 @@
|
||||
/*
|
||||
* SPI testing utility (using spidev driver)
|
||||
*
|
||||
* Copyright (c) 2007 MontaVista Software, Inc.
|
||||
* Copyright (c) 2007 Anton Vorontsov <avorontsov@ru.mvista.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License.
|
||||
*
|
||||
* Cross-compile with cross-gcc -I/path/to/cross-kernel/include
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <getopt.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/spi/spidev.h>
|
||||
|
||||
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
|
||||
|
||||
static void pabort(const char *s)
|
||||
{
|
||||
perror(s);
|
||||
abort();
|
||||
}
|
||||
|
||||
static char *device = "/dev/spidev1.1";
|
||||
static uint8_t mode;
|
||||
static uint8_t bits = 8;
|
||||
static uint32_t speed = 500000;
|
||||
static uint16_t delay;
|
||||
|
||||
static void transfer(int fd)
|
||||
{
|
||||
int ret;
|
||||
uint8_t tx[] = {
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0x40, 0x00, 0x00, 0x00, 0x00, 0x95,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xDE, 0xAD, 0xBE, 0xEF, 0xBA, 0xAD,
|
||||
0xF0, 0x0D,
|
||||
};
|
||||
uint8_t rx[ARRAY_SIZE(tx)] = {0, };
|
||||
struct spi_ioc_transfer tr = {
|
||||
.tx_buf = (unsigned long)tx,
|
||||
.rx_buf = (unsigned long)rx,
|
||||
.len = ARRAY_SIZE(tx),
|
||||
.delay_usecs = delay,
|
||||
.speed_hz = speed,
|
||||
.bits_per_word = bits,
|
||||
};
|
||||
|
||||
ret = ioctl(fd, SPI_IOC_MESSAGE(1), &tr);
|
||||
if (ret == 1)
|
||||
pabort("can't send spi message");
|
||||
|
||||
for (ret = 0; ret < ARRAY_SIZE(tx); ret++) {
|
||||
if (!(ret % 6))
|
||||
puts("");
|
||||
printf("%.2X ", rx[ret]);
|
||||
}
|
||||
puts("");
|
||||
}
|
||||
|
||||
void print_usage(char *prog)
|
||||
{
|
||||
printf("Usage: %s [-DsbdlHOLC3]\n", prog);
|
||||
puts(" -D --device device to use (default /dev/spidev1.1)\n"
|
||||
" -s --speed max speed (Hz)\n"
|
||||
" -d --delay delay (usec)\n"
|
||||
" -b --bpw bits per word \n"
|
||||
" -l --loop loopback\n"
|
||||
" -H --cpha clock phase\n"
|
||||
" -O --cpol clock polarity\n"
|
||||
" -L --lsb least significant bit first\n"
|
||||
" -C --cs-high chip select active high\n"
|
||||
" -3 --3wire SI/SO signals shared\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void parse_opts(int argc, char *argv[])
|
||||
{
|
||||
while (1) {
|
||||
static struct option lopts[] = {
|
||||
{ "device", 1, 0, 'D' },
|
||||
{ "speed", 1, 0, 's' },
|
||||
{ "delay", 1, 0, 'd' },
|
||||
{ "bpw", 1, 0, 'b' },
|
||||
{ "loop", 0, 0, 'l' },
|
||||
{ "cpha", 0, 0, 'H' },
|
||||
{ "cpol", 0, 0, 'O' },
|
||||
{ "lsb", 0, 0, 'L' },
|
||||
{ "cs-high", 0, 0, 'C' },
|
||||
{ "3wire", 0, 0, '3' },
|
||||
{ NULL, 0, 0, 0 },
|
||||
};
|
||||
int c;
|
||||
|
||||
c = getopt_long(argc, argv, "D:s:d:b:lHOLC3", lopts, NULL);
|
||||
|
||||
if (c == -1)
|
||||
break;
|
||||
|
||||
switch (c) {
|
||||
case 'D':
|
||||
device = optarg;
|
||||
break;
|
||||
case 's':
|
||||
speed = atoi(optarg);
|
||||
break;
|
||||
case 'd':
|
||||
delay = atoi(optarg);
|
||||
break;
|
||||
case 'b':
|
||||
bits = atoi(optarg);
|
||||
break;
|
||||
case 'l':
|
||||
mode |= SPI_LOOP;
|
||||
break;
|
||||
case 'H':
|
||||
mode |= SPI_CPHA;
|
||||
break;
|
||||
case 'O':
|
||||
mode |= SPI_CPOL;
|
||||
break;
|
||||
case 'L':
|
||||
mode |= SPI_LSB_FIRST;
|
||||
break;
|
||||
case 'C':
|
||||
mode |= SPI_CS_HIGH;
|
||||
break;
|
||||
case '3':
|
||||
mode |= SPI_3WIRE;
|
||||
break;
|
||||
default:
|
||||
print_usage(argv[0]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int ret = 0;
|
||||
int fd;
|
||||
|
||||
parse_opts(argc, argv);
|
||||
|
||||
fd = open(device, O_RDWR);
|
||||
if (fd < 0)
|
||||
pabort("can't open device");
|
||||
|
||||
/*
|
||||
* spi mode
|
||||
*/
|
||||
ret = ioctl(fd, SPI_IOC_WR_MODE, &mode);
|
||||
if (ret == -1)
|
||||
pabort("can't set spi mode");
|
||||
|
||||
ret = ioctl(fd, SPI_IOC_RD_MODE, &mode);
|
||||
if (ret == -1)
|
||||
pabort("can't get spi mode");
|
||||
|
||||
/*
|
||||
* bits per word
|
||||
*/
|
||||
ret = ioctl(fd, SPI_IOC_WR_BITS_PER_WORD, &bits);
|
||||
if (ret == -1)
|
||||
pabort("can't set bits per word");
|
||||
|
||||
ret = ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits);
|
||||
if (ret == -1)
|
||||
pabort("can't get bits per word");
|
||||
|
||||
/*
|
||||
* max speed hz
|
||||
*/
|
||||
ret = ioctl(fd, SPI_IOC_WR_MAX_SPEED_HZ, &speed);
|
||||
if (ret == -1)
|
||||
pabort("can't set max speed hz");
|
||||
|
||||
ret = ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed);
|
||||
if (ret == -1)
|
||||
pabort("can't get max speed hz");
|
||||
|
||||
printf("spi mode: %d\n", mode);
|
||||
printf("bits per word: %d\n", bits);
|
||||
printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000);
|
||||
|
||||
transfer(fd);
|
||||
|
||||
close(fd);
|
||||
|
||||
return ret;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user