2005-04-17 02:20:36 +04:00
# ifndef _RAID5_H
# define _RAID5_H
# include <linux/raid/xor.h>
2009-08-30 06:09:26 +04:00
# include <linux/dmaengine.h>
2005-04-17 02:20:36 +04:00
/*
*
2011-07-26 05:34:20 +04:00
* Each stripe contains one buffer per device . Each buffer can be in
2005-04-17 02:20:36 +04:00
* one of a number of states stored in " flags " . Changes between
2011-07-26 05:34:20 +04:00
* these states happen * almost * exclusively under the protection of the
* STRIPE_ACTIVE flag . Some very specific changes can happen in bi_end_io , and
* these are not protected by STRIPE_ACTIVE .
2005-04-17 02:20:36 +04:00
*
* The flag bits that are used to represent these states are :
* R5_UPTODATE and R5_LOCKED
*
* State Empty = = ! UPTODATE , ! LOCK
* We have no data , and there is no active request
* State Want = = ! UPTODATE , LOCK
* A read request is being submitted for this block
* State Dirty = = UPTODATE , LOCK
* Some new data is in this buffer , and it is being written out
* State Clean = = UPTODATE , ! LOCK
* We have valid data which is the same as on disc
*
* The possible state transitions are :
*
* Empty - > Want - on read or write to get old data for parity calc
* Empty - > Dirty - on compute_parity to satisfy write / sync request . ( RECONSTRUCT_WRITE )
* Empty - > Clean - on compute_block when computing a block for failed drive
* Want - > Empty - on failed read
* Want - > Clean - on successful completion of read request
* Dirty - > Clean - on successful completion of write request
* Dirty - > Clean - on failed write
* Clean - > Dirty - on compute_parity to satisfy write / sync ( RECONSTRUCT or RMW )
*
* The Want - > Empty , Want - > Clean , Dirty - > Clean , transitions
* all happen in b_end_io at interrupt time .
* Each sets the Uptodate bit before releasing the Lock bit .
* This leaves one multi - stage transition :
* Want - > Dirty - > Clean
* This is safe because thinking that a Clean buffer is actually dirty
* will at worst delay some action , and the stripe will be scheduled
* for attention after the transition is complete .
*
* There is one possibility that is not covered by these states . That
* is if one drive has failed and there is a spare being rebuilt . We
* can ' t distinguish between a clean block that has been generated
* from parity calculations , and a clean block that has been
* successfully written to the spare ( or to parity when resyncing ) .
* To distingush these states we have a stripe bit STRIPE_INSYNC that
* is set whenever a write is scheduled to the spare , or to the parity
* disc if there is no spare . A sync request clears this bit , and
* when we find it set with no buffers locked , we know the sync is
* complete .
*
* Buffers for the md device that arrive via make_request are attached
* to the appropriate stripe in one of two lists linked on b_reqnext .
* One list ( bh_read ) for read requests , one ( bh_write ) for write .
* There should never be more than one buffer on the two lists
* together , but we are not guaranteed of that so we allow for more .
*
* If a buffer is on the read list when the associated cache buffer is
* Uptodate , the data is copied into the read buffer and it ' s b_end_io
* routine is called . This may happen in the end_request routine only
* if the buffer has just successfully been read . end_request should
* remove the buffers from the list and then set the Uptodate bit on
* the buffer . Other threads may do this only if they first check
* that the Uptodate bit is set . Once they have checked that they may
* take buffers off the read queue .
*
* When a buffer on the write list is committed for write it is copied
* into the cache buffer , which is then marked dirty , and moved onto a
* third list , the written list ( bh_written ) . Once both the parity
* block and the cached buffer are successfully written , any buffer on
* a written list can be returned with b_end_io .
*
2011-07-26 05:34:20 +04:00
* The write list and read list both act as fifos . The read list ,
* write list and written list are protected by the device_lock .
* The device_lock is only for list manipulations and will only be
* held for a very short time . It can be claimed from interrupts .
2005-04-17 02:20:36 +04:00
*
*
* Stripes in the stripe cache can be on one of two lists ( or on
* neither ) . The " inactive_list " contains stripes which are not
* currently being used for any request . They can freely be reused
* for another stripe . The " handle_list " contains stripes that need
* to be handled in some way . Both of these are fifo queues . Each
* stripe is also ( potentially ) linked to a hash bucket in the hash
* table so that it can be found by sector number . Stripes that are
* not hashed must be on the inactive_list , and will normally be at
* the front . All stripes start life this way .
*
* The inactive_list , handle_list and hash bucket lists are all protected by the
* device_lock .
* - stripes have a reference counter . If count = = 0 , they are on a list .
* - If a stripe might need handling , STRIPE_HANDLE is set .
* - When refcount reaches zero , then if STRIPE_HANDLE it is put on
* handle_list else inactive_list
*
* This , combined with the fact that STRIPE_HANDLE is only ever
* cleared while a stripe has a non - zero count means that if the
* refcount is 0 and STRIPE_HANDLE is set , then it is on the
* handle_list and if recount is 0 and STRIPE_HANDLE is not set , then
* the stripe is on inactive_list .
*
* The possible transitions are :
* activate an unhashed / inactive stripe ( get_active_stripe ( ) )
* lockdev check - hash unlink - stripe cnt + + clean - stripe hash - stripe unlockdev
* activate a hashed , possibly active stripe ( get_active_stripe ( ) )
* lockdev check - hash if ( ! cnt + + ) unlink - stripe unlockdev
* attach a request to an active stripe ( add_stripe_bh ( ) )
* lockdev attach - buffer unlockdev
* handle a stripe ( handle_stripe ( ) )
2011-07-26 05:34:20 +04:00
* setSTRIPE_ACTIVE , clrSTRIPE_HANDLE . . .
md: raid5_run_ops - run stripe operations outside sh->lock
When the raid acceleration work was proposed, Neil laid out the following
attack plan:
1/ move the xor and copy operations outside spin_lock(&sh->lock)
2/ find/implement an asynchronous offload api
The raid5_run_ops routine uses the asynchronous offload api (async_tx) and
the stripe_operations member of a stripe_head to carry out xor+copy
operations asynchronously, outside the lock.
To perform operations outside the lock a new set of state flags is needed
to track new requests, in-flight requests, and completed requests. In this
new model handle_stripe is tasked with scanning the stripe_head for work,
updating the stripe_operations structure, and finally dropping the lock and
calling raid5_run_ops for processing. The following flags outline the
requests that handle_stripe can make of raid5_run_ops:
STRIPE_OP_BIOFILL
- copy data into request buffers to satisfy a read request
STRIPE_OP_COMPUTE_BLK
- generate a missing block in the cache from the other blocks
STRIPE_OP_PREXOR
- subtract existing data as part of the read-modify-write process
STRIPE_OP_BIODRAIN
- copy data out of request buffers to satisfy a write request
STRIPE_OP_POSTXOR
- recalculate parity for new data that has entered the cache
STRIPE_OP_CHECK
- verify that the parity is correct
STRIPE_OP_IO
- submit i/o to the member disks (note this was already performed outside
the stripe lock, but it made sense to add it as an operation type
The flow is:
1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending
2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the
operation to the async_tx api
3/ async_tx triggers the completion callback routine to set
sh->ops.complete and release the stripe
4/ handle_stripe runs again to finish the operation and optionally submit
new operations that were previously blocked
Note this patch just defines raid5_run_ops, subsequent commits (one per
major operation type) modify handle_stripe to take advantage of this
routine.
Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the raid5_run_ops workqueue
* call bi_end_io for reads in ops_complete_biofill, saves a call to
handle_stripe
* explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown
* fix race between async engines and bi_end_io call for reads, Neil Brown
* remove unnecessary spin_lock from ops_complete_biofill
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
* remove explicit interrupt handling for channel switching, this feature
was absorbed (i.e. it is now implicit) by the async_tx api
* use return_io in ops_complete_biofill
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
2007-01-02 23:52:30 +03:00
* ( lockdev check - buffers unlockdev ) . .
* change - state . .
2011-07-26 05:34:20 +04:00
* record io / ops needed clearSTRIPE_ACTIVE schedule io / ops
2005-04-17 02:20:36 +04:00
* release an active stripe ( release_stripe ( ) )
* lockdev if ( ! - - cnt ) { if STRIPE_HANDLE , add to handle_list else add to inactive - list } unlockdev
*
* The refcount counts each thread that have activated the stripe ,
* plus raid5d if it is handling it , plus one for each active request
md: raid5_run_ops - run stripe operations outside sh->lock
When the raid acceleration work was proposed, Neil laid out the following
attack plan:
1/ move the xor and copy operations outside spin_lock(&sh->lock)
2/ find/implement an asynchronous offload api
The raid5_run_ops routine uses the asynchronous offload api (async_tx) and
the stripe_operations member of a stripe_head to carry out xor+copy
operations asynchronously, outside the lock.
To perform operations outside the lock a new set of state flags is needed
to track new requests, in-flight requests, and completed requests. In this
new model handle_stripe is tasked with scanning the stripe_head for work,
updating the stripe_operations structure, and finally dropping the lock and
calling raid5_run_ops for processing. The following flags outline the
requests that handle_stripe can make of raid5_run_ops:
STRIPE_OP_BIOFILL
- copy data into request buffers to satisfy a read request
STRIPE_OP_COMPUTE_BLK
- generate a missing block in the cache from the other blocks
STRIPE_OP_PREXOR
- subtract existing data as part of the read-modify-write process
STRIPE_OP_BIODRAIN
- copy data out of request buffers to satisfy a write request
STRIPE_OP_POSTXOR
- recalculate parity for new data that has entered the cache
STRIPE_OP_CHECK
- verify that the parity is correct
STRIPE_OP_IO
- submit i/o to the member disks (note this was already performed outside
the stripe lock, but it made sense to add it as an operation type
The flow is:
1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending
2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the
operation to the async_tx api
3/ async_tx triggers the completion callback routine to set
sh->ops.complete and release the stripe
4/ handle_stripe runs again to finish the operation and optionally submit
new operations that were previously blocked
Note this patch just defines raid5_run_ops, subsequent commits (one per
major operation type) modify handle_stripe to take advantage of this
routine.
Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the raid5_run_ops workqueue
* call bi_end_io for reads in ops_complete_biofill, saves a call to
handle_stripe
* explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown
* fix race between async engines and bi_end_io call for reads, Neil Brown
* remove unnecessary spin_lock from ops_complete_biofill
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
* remove explicit interrupt handling for channel switching, this feature
was absorbed (i.e. it is now implicit) by the async_tx api
* use return_io in ops_complete_biofill
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
2007-01-02 23:52:30 +03:00
* on a cached buffer , and plus one if the stripe is undergoing stripe
* operations .
*
2011-07-26 05:34:20 +04:00
* The stripe operations are :
md: raid5_run_ops - run stripe operations outside sh->lock
When the raid acceleration work was proposed, Neil laid out the following
attack plan:
1/ move the xor and copy operations outside spin_lock(&sh->lock)
2/ find/implement an asynchronous offload api
The raid5_run_ops routine uses the asynchronous offload api (async_tx) and
the stripe_operations member of a stripe_head to carry out xor+copy
operations asynchronously, outside the lock.
To perform operations outside the lock a new set of state flags is needed
to track new requests, in-flight requests, and completed requests. In this
new model handle_stripe is tasked with scanning the stripe_head for work,
updating the stripe_operations structure, and finally dropping the lock and
calling raid5_run_ops for processing. The following flags outline the
requests that handle_stripe can make of raid5_run_ops:
STRIPE_OP_BIOFILL
- copy data into request buffers to satisfy a read request
STRIPE_OP_COMPUTE_BLK
- generate a missing block in the cache from the other blocks
STRIPE_OP_PREXOR
- subtract existing data as part of the read-modify-write process
STRIPE_OP_BIODRAIN
- copy data out of request buffers to satisfy a write request
STRIPE_OP_POSTXOR
- recalculate parity for new data that has entered the cache
STRIPE_OP_CHECK
- verify that the parity is correct
STRIPE_OP_IO
- submit i/o to the member disks (note this was already performed outside
the stripe lock, but it made sense to add it as an operation type
The flow is:
1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending
2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the
operation to the async_tx api
3/ async_tx triggers the completion callback routine to set
sh->ops.complete and release the stripe
4/ handle_stripe runs again to finish the operation and optionally submit
new operations that were previously blocked
Note this patch just defines raid5_run_ops, subsequent commits (one per
major operation type) modify handle_stripe to take advantage of this
routine.
Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the raid5_run_ops workqueue
* call bi_end_io for reads in ops_complete_biofill, saves a call to
handle_stripe
* explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown
* fix race between async engines and bi_end_io call for reads, Neil Brown
* remove unnecessary spin_lock from ops_complete_biofill
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
* remove explicit interrupt handling for channel switching, this feature
was absorbed (i.e. it is now implicit) by the async_tx api
* use return_io in ops_complete_biofill
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
2007-01-02 23:52:30 +03:00
* - copying data between the stripe cache and user application buffers
* - computing blocks to save a disk access , or to recover a missing block
* - updating the parity on a write operation ( reconstruct write and
* read - modify - write )
* - checking parity correctness
* - running i / o to disk
* These operations are carried out by raid5_run_ops which uses the async_tx
* api to ( optionally ) offload operations to dedicated hardware engines .
* When requesting an operation handle_stripe sets the pending bit for the
* operation and increments the count . raid5_run_ops is then run whenever
* the count is non - zero .
* There are some critical dependencies between the operations that prevent some
* from being requested while another is in flight .
* 1 / Parity check operations destroy the in cache version of the parity block ,
* so we prevent parity dependent operations like writes and compute_blocks
* from starting while a check is in progress . Some dma engines can perform
* the check without damaging the parity block , in these cases the parity
* block is re - marked up to date ( assuming the check was successful ) and is
* not re - read from disk .
* 2 / When a write operation is requested we immediately lock the affected
* blocks , and mark them as not up to date . This causes new read requests
* to be held off , as well as parity checks and compute block operations .
* 3 / Once a compute block operation has been requested handle_stripe treats
* that block as if it is up to date . raid5_run_ops guaruntees that any
* operation that is dependent on the compute block result is initiated after
* the compute block completes .
2005-04-17 02:20:36 +04:00
*/
2008-06-28 02:31:57 +04:00
/*
2011-07-26 05:34:20 +04:00
* Operations state - intermediate states that are visible outside of
* STRIPE_ACTIVE .
2008-06-28 02:31:57 +04:00
* In general _idle indicates nothing is running , _run indicates a data
* processing operation is active , and _result means the data processing result
* is stable and can be acted upon . For simple operations like biofill and
* compute that only have an _idle and _run state they are indicated with
* sh - > state flags ( STRIPE_BIOFILL_RUN and STRIPE_COMPUTE_RUN )
*/
/**
* enum check_states - handles syncing / repairing a stripe
* @ check_state_idle - check operations are quiesced
* @ check_state_run - check operation is running
* @ check_state_result - set outside lock when check result is valid
* @ check_state_compute_run - check failed and we are repairing
* @ check_state_compute_result - set outside lock when compute result is valid
*/
enum check_states {
check_state_idle = 0 ,
2009-07-15 00:40:19 +04:00
check_state_run , /* xor parity check */
check_state_run_q , /* q-parity check */
check_state_run_pq , /* pq dual parity check */
2008-06-28 02:31:57 +04:00
check_state_check_result ,
check_state_compute_run , /* parity repair */
check_state_compute_result ,
} ;
/**
* enum reconstruct_states - handles writing or expanding a stripe
*/
enum reconstruct_states {
reconstruct_state_idle = 0 ,
2008-06-28 02:32:06 +04:00
reconstruct_state_prexor_drain_run , /* prexor-write */
2008-06-28 02:31:57 +04:00
reconstruct_state_drain_run , /* write */
reconstruct_state_run , /* expand */
2008-06-28 02:32:06 +04:00
reconstruct_state_prexor_drain_result ,
2008-06-28 02:31:57 +04:00
reconstruct_state_drain_result ,
reconstruct_state_result ,
} ;
2005-04-17 02:20:36 +04:00
struct stripe_head {
2006-01-06 11:20:33 +03:00
struct hlist_node hash ;
2009-03-31 07:39:38 +04:00
struct list_head lru ; /* inactive_list or handle_list */
struct raid5_private_data * raid_conf ;
2009-03-31 08:19:03 +04:00
short generation ; /* increments with every
* reshape */
2009-03-31 07:39:38 +04:00
sector_t sector ; /* sector of this row */
short pd_idx ; /* parity disk index */
short qd_idx ; /* 'Q' disk index for raid6 */
2009-03-31 07:39:38 +04:00
short ddf_layout ; /* use DDF ordering to calculate Q */
2009-03-31 07:39:38 +04:00
unsigned long state ; /* state flags */
atomic_t count ; /* nr of active thread/requests */
2005-09-10 03:23:54 +04:00
int bm_seq ; /* sequence number for bitmap flushes */
2009-03-31 07:39:38 +04:00
int disks ; /* disks in stripe */
2008-06-28 02:31:57 +04:00
enum check_states check_state ;
md: replace STRIPE_OP_{BIODRAIN,PREXOR,POSTXOR} with 'reconstruct_states'
From: Dan Williams <dan.j.williams@intel.com>
Track the state of reconstruct operations (recalculating the parity block
usually due to incoming writes, or as part of array expansion) Reduces the
scope of the STRIPE_OP_{BIODRAIN,PREXOR,POSTXOR} flags to only tracking whether
a reconstruct operation has been requested via the ops_request field of struct
stripe_head_state.
This is the final step in the removal of ops.{pending,ack,complete,count}, i.e.
the STRIPE_OP_{BIODRAIN,PREXOR,POSTXOR} flags only request an operation and do
not track the state of the operation.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Neil Brown <neilb@suse.de>
2008-06-28 02:32:05 +04:00
enum reconstruct_states reconstruct_state ;
2009-10-16 09:25:22 +04:00
/**
* struct stripe_operations
md: raid5_run_ops - run stripe operations outside sh->lock
When the raid acceleration work was proposed, Neil laid out the following
attack plan:
1/ move the xor and copy operations outside spin_lock(&sh->lock)
2/ find/implement an asynchronous offload api
The raid5_run_ops routine uses the asynchronous offload api (async_tx) and
the stripe_operations member of a stripe_head to carry out xor+copy
operations asynchronously, outside the lock.
To perform operations outside the lock a new set of state flags is needed
to track new requests, in-flight requests, and completed requests. In this
new model handle_stripe is tasked with scanning the stripe_head for work,
updating the stripe_operations structure, and finally dropping the lock and
calling raid5_run_ops for processing. The following flags outline the
requests that handle_stripe can make of raid5_run_ops:
STRIPE_OP_BIOFILL
- copy data into request buffers to satisfy a read request
STRIPE_OP_COMPUTE_BLK
- generate a missing block in the cache from the other blocks
STRIPE_OP_PREXOR
- subtract existing data as part of the read-modify-write process
STRIPE_OP_BIODRAIN
- copy data out of request buffers to satisfy a write request
STRIPE_OP_POSTXOR
- recalculate parity for new data that has entered the cache
STRIPE_OP_CHECK
- verify that the parity is correct
STRIPE_OP_IO
- submit i/o to the member disks (note this was already performed outside
the stripe lock, but it made sense to add it as an operation type
The flow is:
1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending
2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the
operation to the async_tx api
3/ async_tx triggers the completion callback routine to set
sh->ops.complete and release the stripe
4/ handle_stripe runs again to finish the operation and optionally submit
new operations that were previously blocked
Note this patch just defines raid5_run_ops, subsequent commits (one per
major operation type) modify handle_stripe to take advantage of this
routine.
Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the raid5_run_ops workqueue
* call bi_end_io for reads in ops_complete_biofill, saves a call to
handle_stripe
* explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown
* fix race between async engines and bi_end_io call for reads, Neil Brown
* remove unnecessary spin_lock from ops_complete_biofill
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
* remove explicit interrupt handling for channel switching, this feature
was absorbed (i.e. it is now implicit) by the async_tx api
* use return_io in ops_complete_biofill
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
2007-01-02 23:52:30 +03:00
* @ target - STRIPE_OP_COMPUTE_BLK target
2009-10-16 09:25:22 +04:00
* @ target2 - 2 nd compute target in the raid6 case
* @ zero_sum_result - P and Q verification flags
* @ request - async service request flags for raid_run_ops
md: raid5_run_ops - run stripe operations outside sh->lock
When the raid acceleration work was proposed, Neil laid out the following
attack plan:
1/ move the xor and copy operations outside spin_lock(&sh->lock)
2/ find/implement an asynchronous offload api
The raid5_run_ops routine uses the asynchronous offload api (async_tx) and
the stripe_operations member of a stripe_head to carry out xor+copy
operations asynchronously, outside the lock.
To perform operations outside the lock a new set of state flags is needed
to track new requests, in-flight requests, and completed requests. In this
new model handle_stripe is tasked with scanning the stripe_head for work,
updating the stripe_operations structure, and finally dropping the lock and
calling raid5_run_ops for processing. The following flags outline the
requests that handle_stripe can make of raid5_run_ops:
STRIPE_OP_BIOFILL
- copy data into request buffers to satisfy a read request
STRIPE_OP_COMPUTE_BLK
- generate a missing block in the cache from the other blocks
STRIPE_OP_PREXOR
- subtract existing data as part of the read-modify-write process
STRIPE_OP_BIODRAIN
- copy data out of request buffers to satisfy a write request
STRIPE_OP_POSTXOR
- recalculate parity for new data that has entered the cache
STRIPE_OP_CHECK
- verify that the parity is correct
STRIPE_OP_IO
- submit i/o to the member disks (note this was already performed outside
the stripe lock, but it made sense to add it as an operation type
The flow is:
1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending
2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the
operation to the async_tx api
3/ async_tx triggers the completion callback routine to set
sh->ops.complete and release the stripe
4/ handle_stripe runs again to finish the operation and optionally submit
new operations that were previously blocked
Note this patch just defines raid5_run_ops, subsequent commits (one per
major operation type) modify handle_stripe to take advantage of this
routine.
Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the raid5_run_ops workqueue
* call bi_end_io for reads in ops_complete_biofill, saves a call to
handle_stripe
* explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown
* fix race between async engines and bi_end_io call for reads, Neil Brown
* remove unnecessary spin_lock from ops_complete_biofill
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
* remove explicit interrupt handling for channel switching, this feature
was absorbed (i.e. it is now implicit) by the async_tx api
* use return_io in ops_complete_biofill
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
2007-01-02 23:52:30 +03:00
*/
struct stripe_operations {
2009-07-15 00:40:19 +04:00
int target , target2 ;
2009-08-30 06:09:26 +04:00
enum sum_check_flags zero_sum_result ;
2009-10-16 09:25:22 +04:00
# ifdef CONFIG_MULTICORE_RAID456
unsigned long request ;
wait_queue_head_t wait_for_ops ;
# endif
md: raid5_run_ops - run stripe operations outside sh->lock
When the raid acceleration work was proposed, Neil laid out the following
attack plan:
1/ move the xor and copy operations outside spin_lock(&sh->lock)
2/ find/implement an asynchronous offload api
The raid5_run_ops routine uses the asynchronous offload api (async_tx) and
the stripe_operations member of a stripe_head to carry out xor+copy
operations asynchronously, outside the lock.
To perform operations outside the lock a new set of state flags is needed
to track new requests, in-flight requests, and completed requests. In this
new model handle_stripe is tasked with scanning the stripe_head for work,
updating the stripe_operations structure, and finally dropping the lock and
calling raid5_run_ops for processing. The following flags outline the
requests that handle_stripe can make of raid5_run_ops:
STRIPE_OP_BIOFILL
- copy data into request buffers to satisfy a read request
STRIPE_OP_COMPUTE_BLK
- generate a missing block in the cache from the other blocks
STRIPE_OP_PREXOR
- subtract existing data as part of the read-modify-write process
STRIPE_OP_BIODRAIN
- copy data out of request buffers to satisfy a write request
STRIPE_OP_POSTXOR
- recalculate parity for new data that has entered the cache
STRIPE_OP_CHECK
- verify that the parity is correct
STRIPE_OP_IO
- submit i/o to the member disks (note this was already performed outside
the stripe lock, but it made sense to add it as an operation type
The flow is:
1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending
2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the
operation to the async_tx api
3/ async_tx triggers the completion callback routine to set
sh->ops.complete and release the stripe
4/ handle_stripe runs again to finish the operation and optionally submit
new operations that were previously blocked
Note this patch just defines raid5_run_ops, subsequent commits (one per
major operation type) modify handle_stripe to take advantage of this
routine.
Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the raid5_run_ops workqueue
* call bi_end_io for reads in ops_complete_biofill, saves a call to
handle_stripe
* explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown
* fix race between async engines and bi_end_io call for reads, Neil Brown
* remove unnecessary spin_lock from ops_complete_biofill
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
* remove explicit interrupt handling for channel switching, this feature
was absorbed (i.e. it is now implicit) by the async_tx api
* use return_io in ops_complete_biofill
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
2007-01-02 23:52:30 +03:00
} ops ;
2005-04-17 02:20:36 +04:00
struct r5dev {
struct bio req ;
struct bio_vec vec ;
struct page * page ;
md: raid5_run_ops - run stripe operations outside sh->lock
When the raid acceleration work was proposed, Neil laid out the following
attack plan:
1/ move the xor and copy operations outside spin_lock(&sh->lock)
2/ find/implement an asynchronous offload api
The raid5_run_ops routine uses the asynchronous offload api (async_tx) and
the stripe_operations member of a stripe_head to carry out xor+copy
operations asynchronously, outside the lock.
To perform operations outside the lock a new set of state flags is needed
to track new requests, in-flight requests, and completed requests. In this
new model handle_stripe is tasked with scanning the stripe_head for work,
updating the stripe_operations structure, and finally dropping the lock and
calling raid5_run_ops for processing. The following flags outline the
requests that handle_stripe can make of raid5_run_ops:
STRIPE_OP_BIOFILL
- copy data into request buffers to satisfy a read request
STRIPE_OP_COMPUTE_BLK
- generate a missing block in the cache from the other blocks
STRIPE_OP_PREXOR
- subtract existing data as part of the read-modify-write process
STRIPE_OP_BIODRAIN
- copy data out of request buffers to satisfy a write request
STRIPE_OP_POSTXOR
- recalculate parity for new data that has entered the cache
STRIPE_OP_CHECK
- verify that the parity is correct
STRIPE_OP_IO
- submit i/o to the member disks (note this was already performed outside
the stripe lock, but it made sense to add it as an operation type
The flow is:
1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending
2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the
operation to the async_tx api
3/ async_tx triggers the completion callback routine to set
sh->ops.complete and release the stripe
4/ handle_stripe runs again to finish the operation and optionally submit
new operations that were previously blocked
Note this patch just defines raid5_run_ops, subsequent commits (one per
major operation type) modify handle_stripe to take advantage of this
routine.
Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the raid5_run_ops workqueue
* call bi_end_io for reads in ops_complete_biofill, saves a call to
handle_stripe
* explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown
* fix race between async engines and bi_end_io call for reads, Neil Brown
* remove unnecessary spin_lock from ops_complete_biofill
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
* remove explicit interrupt handling for channel switching, this feature
was absorbed (i.e. it is now implicit) by the async_tx api
* use return_io in ops_complete_biofill
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
2007-01-02 23:52:30 +03:00
struct bio * toread , * read , * towrite , * written ;
2005-04-17 02:20:36 +04:00
sector_t sector ; /* sector of this page */
unsigned long flags ;
} dev [ 1 ] ; /* allocated with extra space depending of RAID geometry */
} ;
2007-07-09 22:56:43 +04:00
/* stripe_head_state - collects and tracks the dynamic state of a stripe_head
2011-07-26 05:34:20 +04:00
* for handle_stripe .
2007-07-09 22:56:43 +04:00
*/
struct stripe_head_state {
int syncing , expanding , expanded ;
int locked , uptodate , to_read , to_write , failed , written ;
2007-01-02 23:52:31 +03:00
int to_fill , compute , req_compute , non_overwrite ;
2011-07-26 05:35:19 +04:00
int failed_num [ 2 ] ;
int p_failed , q_failed ;
2011-07-26 05:35:20 +04:00
int dec_preread_active ;
unsigned long ops_request ;
struct bio * return_bi ;
mdk_rdev_t * blocked_rdev ;
2011-07-28 05:39:22 +04:00
int handle_bad_blocks ;
2007-07-09 22:56:43 +04:00
} ;
2005-04-17 02:20:36 +04:00
/* Flags */
# define R5_UPTODATE 0 /* page contains current data */
# define R5_LOCKED 1 /* IO has been submitted on "req" */
# define R5_OVERWRITE 2 /* towrite covers whole page */
/* and some that are internal to handle_stripe */
# define R5_Insync 3 /* rdev && rdev->in_sync at start */
# define R5_Wantread 4 /* want to schedule a read */
# define R5_Wantwrite 5
# define R5_Overlap 7 /* There is a pending overlapping request on this block */
2005-11-09 08:39:22 +03:00
# define R5_ReadError 8 /* seen a read error here recently */
# define R5_ReWrite 9 /* have tried to over-write the readerror */
2005-04-17 02:20:36 +04:00
2006-03-27 13:18:09 +04:00
# define R5_Expanded 10 /* This block now has post-expand data */
2011-07-28 05:39:22 +04:00
# define R5_Wantcompute 11 / * compute_block in progress treat as
* uptodate
*/
# define R5_Wantfill 12 / * dev->toread contains a bio that needs
* filling
*/
# define R5_Wantdrain 13 /* dev->towrite needs to be drained */
# define R5_WantFUA 14 /* Write should be FUA */
# define R5_WriteError 15 /* got a write error - need to record it */
2011-07-28 05:39:23 +04:00
# define R5_MadeGood 16 /* A bad block has been fixed by writing to it*/
2005-04-17 02:20:36 +04:00
/*
* Write method
*/
# define RECONSTRUCT_WRITE 1
# define READ_MODIFY_WRITE 2
/* not a write method, but a compute_parity mode */
# define CHECK_PARITY 3
2009-03-31 08:09:39 +04:00
/* Additional compute_parity mode -- updates the parity w/o LOCKING */
# define UPDATE_PARITY 4
2005-04-17 02:20:36 +04:00
/*
* Stripe state
*/
2011-07-26 05:19:49 +04:00
enum {
2011-07-26 05:34:20 +04:00
STRIPE_ACTIVE ,
2011-07-26 05:19:49 +04:00
STRIPE_HANDLE ,
STRIPE_SYNC_REQUESTED ,
STRIPE_SYNCING ,
STRIPE_INSYNC ,
STRIPE_PREREAD_ACTIVE ,
STRIPE_DELAYED ,
STRIPE_DEGRADED ,
STRIPE_BIT_DELAY ,
STRIPE_EXPANDING ,
STRIPE_EXPAND_SOURCE ,
STRIPE_EXPAND_READY ,
STRIPE_IO_STARTED , /* do not count towards 'bypass_count' */
STRIPE_FULL_WRITE , /* all blocks are set to be overwritten */
STRIPE_BIOFILL_RUN ,
STRIPE_COMPUTE_RUN ,
STRIPE_OPS_REQ_PENDING ,
} ;
2009-10-16 09:25:22 +04:00
md: raid5_run_ops - run stripe operations outside sh->lock
When the raid acceleration work was proposed, Neil laid out the following
attack plan:
1/ move the xor and copy operations outside spin_lock(&sh->lock)
2/ find/implement an asynchronous offload api
The raid5_run_ops routine uses the asynchronous offload api (async_tx) and
the stripe_operations member of a stripe_head to carry out xor+copy
operations asynchronously, outside the lock.
To perform operations outside the lock a new set of state flags is needed
to track new requests, in-flight requests, and completed requests. In this
new model handle_stripe is tasked with scanning the stripe_head for work,
updating the stripe_operations structure, and finally dropping the lock and
calling raid5_run_ops for processing. The following flags outline the
requests that handle_stripe can make of raid5_run_ops:
STRIPE_OP_BIOFILL
- copy data into request buffers to satisfy a read request
STRIPE_OP_COMPUTE_BLK
- generate a missing block in the cache from the other blocks
STRIPE_OP_PREXOR
- subtract existing data as part of the read-modify-write process
STRIPE_OP_BIODRAIN
- copy data out of request buffers to satisfy a write request
STRIPE_OP_POSTXOR
- recalculate parity for new data that has entered the cache
STRIPE_OP_CHECK
- verify that the parity is correct
STRIPE_OP_IO
- submit i/o to the member disks (note this was already performed outside
the stripe lock, but it made sense to add it as an operation type
The flow is:
1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending
2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the
operation to the async_tx api
3/ async_tx triggers the completion callback routine to set
sh->ops.complete and release the stripe
4/ handle_stripe runs again to finish the operation and optionally submit
new operations that were previously blocked
Note this patch just defines raid5_run_ops, subsequent commits (one per
major operation type) modify handle_stripe to take advantage of this
routine.
Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the raid5_run_ops workqueue
* call bi_end_io for reads in ops_complete_biofill, saves a call to
handle_stripe
* explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown
* fix race between async engines and bi_end_io call for reads, Neil Brown
* remove unnecessary spin_lock from ops_complete_biofill
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
* remove explicit interrupt handling for channel switching, this feature
was absorbed (i.e. it is now implicit) by the async_tx api
* use return_io in ops_complete_biofill
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
2007-01-02 23:52:30 +03:00
/*
2008-06-28 02:31:57 +04:00
* Operation request flags
md: raid5_run_ops - run stripe operations outside sh->lock
When the raid acceleration work was proposed, Neil laid out the following
attack plan:
1/ move the xor and copy operations outside spin_lock(&sh->lock)
2/ find/implement an asynchronous offload api
The raid5_run_ops routine uses the asynchronous offload api (async_tx) and
the stripe_operations member of a stripe_head to carry out xor+copy
operations asynchronously, outside the lock.
To perform operations outside the lock a new set of state flags is needed
to track new requests, in-flight requests, and completed requests. In this
new model handle_stripe is tasked with scanning the stripe_head for work,
updating the stripe_operations structure, and finally dropping the lock and
calling raid5_run_ops for processing. The following flags outline the
requests that handle_stripe can make of raid5_run_ops:
STRIPE_OP_BIOFILL
- copy data into request buffers to satisfy a read request
STRIPE_OP_COMPUTE_BLK
- generate a missing block in the cache from the other blocks
STRIPE_OP_PREXOR
- subtract existing data as part of the read-modify-write process
STRIPE_OP_BIODRAIN
- copy data out of request buffers to satisfy a write request
STRIPE_OP_POSTXOR
- recalculate parity for new data that has entered the cache
STRIPE_OP_CHECK
- verify that the parity is correct
STRIPE_OP_IO
- submit i/o to the member disks (note this was already performed outside
the stripe lock, but it made sense to add it as an operation type
The flow is:
1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending
2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the
operation to the async_tx api
3/ async_tx triggers the completion callback routine to set
sh->ops.complete and release the stripe
4/ handle_stripe runs again to finish the operation and optionally submit
new operations that were previously blocked
Note this patch just defines raid5_run_ops, subsequent commits (one per
major operation type) modify handle_stripe to take advantage of this
routine.
Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the raid5_run_ops workqueue
* call bi_end_io for reads in ops_complete_biofill, saves a call to
handle_stripe
* explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown
* fix race between async engines and bi_end_io call for reads, Neil Brown
* remove unnecessary spin_lock from ops_complete_biofill
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
* remove explicit interrupt handling for channel switching, this feature
was absorbed (i.e. it is now implicit) by the async_tx api
* use return_io in ops_complete_biofill
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
2007-01-02 23:52:30 +03:00
*/
# define STRIPE_OP_BIOFILL 0
# define STRIPE_OP_COMPUTE_BLK 1
# define STRIPE_OP_PREXOR 2
# define STRIPE_OP_BIODRAIN 3
2009-07-15 00:40:19 +04:00
# define STRIPE_OP_RECONSTRUCT 4
md: raid5_run_ops - run stripe operations outside sh->lock
When the raid acceleration work was proposed, Neil laid out the following
attack plan:
1/ move the xor and copy operations outside spin_lock(&sh->lock)
2/ find/implement an asynchronous offload api
The raid5_run_ops routine uses the asynchronous offload api (async_tx) and
the stripe_operations member of a stripe_head to carry out xor+copy
operations asynchronously, outside the lock.
To perform operations outside the lock a new set of state flags is needed
to track new requests, in-flight requests, and completed requests. In this
new model handle_stripe is tasked with scanning the stripe_head for work,
updating the stripe_operations structure, and finally dropping the lock and
calling raid5_run_ops for processing. The following flags outline the
requests that handle_stripe can make of raid5_run_ops:
STRIPE_OP_BIOFILL
- copy data into request buffers to satisfy a read request
STRIPE_OP_COMPUTE_BLK
- generate a missing block in the cache from the other blocks
STRIPE_OP_PREXOR
- subtract existing data as part of the read-modify-write process
STRIPE_OP_BIODRAIN
- copy data out of request buffers to satisfy a write request
STRIPE_OP_POSTXOR
- recalculate parity for new data that has entered the cache
STRIPE_OP_CHECK
- verify that the parity is correct
STRIPE_OP_IO
- submit i/o to the member disks (note this was already performed outside
the stripe lock, but it made sense to add it as an operation type
The flow is:
1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending
2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the
operation to the async_tx api
3/ async_tx triggers the completion callback routine to set
sh->ops.complete and release the stripe
4/ handle_stripe runs again to finish the operation and optionally submit
new operations that were previously blocked
Note this patch just defines raid5_run_ops, subsequent commits (one per
major operation type) modify handle_stripe to take advantage of this
routine.
Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the raid5_run_ops workqueue
* call bi_end_io for reads in ops_complete_biofill, saves a call to
handle_stripe
* explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown
* fix race between async engines and bi_end_io call for reads, Neil Brown
* remove unnecessary spin_lock from ops_complete_biofill
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
* remove explicit interrupt handling for channel switching, this feature
was absorbed (i.e. it is now implicit) by the async_tx api
* use return_io in ops_complete_biofill
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
2007-01-02 23:52:30 +03:00
# define STRIPE_OP_CHECK 5
2005-04-17 02:20:36 +04:00
/*
* Plugging :
*
* To improve write throughput , we need to delay the handling of some
* stripes until there has been a chance that several write requests
* for the one stripe have all been collected .
* In particular , any write request that would require pre - reading
* is put on a " delayed " queue until there are no stripes currently
* in a pre - read phase . Further , if the " delayed " queue is empty when
* a stripe is put on it then we " plug " the queue and do not process it
* until an unplug call is made . ( the unplug_io_fn ( ) is called ) .
*
* When preread is initiated on a stripe , we set PREREAD_ACTIVE and add
* it to the count of prereading stripes .
* When write is initiated , or the stripe refcnt = = 0 ( just in case ) we
* clear the PREREAD_ACTIVE flag and decrement the count
2006-10-03 12:15:45 +04:00
* Whenever the ' handle ' queue is empty and the device is not plugged , we
* move any strips from delayed to handle and clear the DELAYED flag and set
* PREREAD_ACTIVE .
2005-04-17 02:20:36 +04:00
* In stripe_handle , if we find pre - reading is necessary , we do it if
* PREREAD_ACTIVE is set , else we set DELAYED which will send it to the delayed queue .
2011-07-26 05:34:20 +04:00
* HANDLE gets cleared if stripe_handle leaves nothing locked .
2005-04-17 02:20:36 +04:00
*/
2009-03-31 07:27:03 +04:00
2005-04-17 02:20:36 +04:00
struct disk_info {
mdk_rdev_t * rdev ;
} ;
struct raid5_private_data {
2006-01-06 11:20:33 +03:00
struct hlist_head * stripe_hashtbl ;
2005-04-17 02:20:36 +04:00
mddev_t * mddev ;
struct disk_info * spare ;
2009-06-18 02:45:55 +04:00
int chunk_sectors ;
int level , algorithm ;
2006-06-26 11:27:38 +04:00
int max_degraded ;
2006-10-03 12:15:47 +04:00
int raid_disks ;
2005-04-17 02:20:36 +04:00
int max_nr_stripes ;
2009-03-31 08:16:46 +04:00
/* reshape_progress is the leading edge of a 'reshape'
* It has value MaxSector when no reshape is happening
* If delta_disks < 0 , it is the last sector we started work on ,
* else is it the next sector to work on .
*/
sector_t reshape_progress ;
/* reshape_safe is the trailing edge of a reshape. We know that
* before ( or after ) this address , all reshape has completed .
*/
sector_t reshape_safe ;
2006-03-27 13:18:08 +04:00
int previous_raid_disks ;
2009-06-18 02:45:55 +04:00
int prev_chunk_sectors ;
int prev_algo ;
2009-03-31 08:19:03 +04:00
short generation ; /* increments with every reshape */
2009-03-31 08:28:40 +04:00
unsigned long reshape_checkpoint ; /* Time we last updated
* metadata */
2006-03-27 13:18:08 +04:00
2005-04-17 02:20:36 +04:00
struct list_head handle_list ; /* stripes needing handling */
2008-04-28 13:15:53 +04:00
struct list_head hold_list ; /* preread ready stripes */
2005-04-17 02:20:36 +04:00
struct list_head delayed_list ; /* stripes that have plugged requests */
2005-09-10 03:23:54 +04:00
struct list_head bitmap_list ; /* stripes delaying awaiting bitmap update */
2006-12-10 13:20:47 +03:00
struct bio * retry_read_aligned ; /* currently retrying aligned bios */
struct bio * retry_read_aligned_list ; /* aligned bios retry list */
2005-04-17 02:20:36 +04:00
atomic_t preread_active_stripes ; /* stripes with scheduled io */
2006-12-10 13:20:47 +03:00
atomic_t active_aligned_reads ;
2008-04-28 13:15:53 +04:00
atomic_t pending_full_writes ; /* full write backlog */
int bypass_count ; /* bypassed prereads */
int bypass_threshold ; /* preread nice */
struct list_head * last_hold ; /* detect hold_list promotions */
2005-04-17 02:20:36 +04:00
2006-03-27 13:18:11 +04:00
atomic_t reshape_stripes ; /* stripes with pending writes for reshape */
2006-03-27 13:18:07 +04:00
/* unfortunately we need two cache names as we temporarily have
* two caches .
*/
int active_name ;
2010-06-01 13:37:25 +04:00
char cache_name [ 2 ] [ 32 ] ;
2006-12-07 07:33:20 +03:00
struct kmem_cache * slab_cache ; /* for allocating stripes */
2005-09-10 03:23:54 +04:00
int seq_flush , seq_write ;
int quiesce ;
int fullsync ; /* set to 1 if a full sync is needed,
* ( fresh device added ) .
* Cleared when a sync completes .
*/
2011-07-28 05:39:22 +04:00
int recovery_disabled ;
2009-07-14 22:48:22 +04:00
/* per cpu variables */
struct raid5_percpu {
struct page * spare_page ; /* Used when checking P/Q in raid6 */
2009-07-14 22:50:52 +04:00
void * scribble ; /* space for constructing buffer
* lists and performing address
* conversions
*/
2010-02-02 08:39:15 +03:00
} __percpu * percpu ;
2009-07-14 22:50:52 +04:00
size_t scribble_len ; /* size of scribble region must be
* associated with conf to handle
* cpu hotplug while reshaping
*/
2009-07-14 22:48:22 +04:00
# ifdef CONFIG_HOTPLUG_CPU
struct notifier_block cpu_notify ;
# endif
2006-01-06 11:20:17 +03:00
2005-04-17 02:20:36 +04:00
/*
* Free stripes pool
*/
atomic_t active_stripes ;
struct list_head inactive_list ;
wait_queue_head_t wait_for_stripe ;
wait_queue_head_t wait_for_overlap ;
int inactive_blocked ; /* release of inactive stripes blocked,
* waiting for 25 % to be free
2006-03-27 13:18:07 +04:00
*/
int pool_size ; /* number of disks in stripeheads in pool */
2005-04-17 02:20:36 +04:00
spinlock_t device_lock ;
2006-03-27 13:18:06 +04:00
struct disk_info * disks ;
2009-03-31 07:39:39 +04:00
/* When taking over an array from a different personality, we store
* the new thread here until we fully activate the array .
*/
struct mdk_thread_s * thread ;
2005-04-17 02:20:36 +04:00
} ;
typedef struct raid5_private_data raid5_conf_t ;
/*
* Our supported algorithms
*/
2009-03-31 07:39:38 +04:00
# define ALGORITHM_LEFT_ASYMMETRIC 0 /* Rotating Parity N with Data Restart */
# define ALGORITHM_RIGHT_ASYMMETRIC 1 /* Rotating Parity 0 with Data Restart */
# define ALGORITHM_LEFT_SYMMETRIC 2 /* Rotating Parity N with Data Continuation */
# define ALGORITHM_RIGHT_SYMMETRIC 3 /* Rotating Parity 0 with Data Continuation */
2005-04-17 02:20:36 +04:00
2009-03-31 07:39:38 +04:00
/* Define non-rotating (raid4) algorithms. These allow
* conversion of raid4 to raid5 .
*/
# define ALGORITHM_PARITY_0 4 /* P or P,Q are initial devices */
# define ALGORITHM_PARITY_N 5 /* P or P,Q are final devices. */
/* DDF RAID6 layouts differ from md/raid6 layouts in two ways.
* Firstly , the exact positioning of the parity block is slightly
* different between the ' LEFT_ * ' modes of md and the " _N_* " modes
* of DDF .
* Secondly , or order of datablocks over which the Q syndrome is computed
* is different .
* Consequently we have different layouts for DDF / raid6 than md / raid6 .
* These layouts are from the DDFv1 .2 spec .
* Interestingly DDFv1 .2 - Errata - A does not specify N_CONTINUE but
* leaves RLQ = 3 as ' Vendor Specific '
*/
# define ALGORITHM_ROTATING_ZERO_RESTART 8 /* DDF PRL=6 RLQ=1 */
# define ALGORITHM_ROTATING_N_RESTART 9 /* DDF PRL=6 RLQ=2 */
# define ALGORITHM_ROTATING_N_CONTINUE 10 /*DDF PRL=6 RLQ=3 */
/* For every RAID5 algorithm we define a RAID6 algorithm
* with exactly the same layout for data and parity , and
* with the Q block always on the last device ( N - 1 ) .
* This allows trivial conversion from RAID5 to RAID6
*/
# define ALGORITHM_LEFT_ASYMMETRIC_6 16
# define ALGORITHM_RIGHT_ASYMMETRIC_6 17
# define ALGORITHM_LEFT_SYMMETRIC_6 18
# define ALGORITHM_RIGHT_SYMMETRIC_6 19
# define ALGORITHM_PARITY_0_6 20
# define ALGORITHM_PARITY_N_6 ALGORITHM_PARITY_N
static inline int algorithm_valid_raid5 ( int layout )
{
return ( layout > = 0 ) & &
( layout < = 5 ) ;
}
static inline int algorithm_valid_raid6 ( int layout )
{
return ( layout > = 0 & & layout < = 5 )
| |
2009-10-16 09:27:34 +04:00
( layout > = 8 & & layout < = 10 )
2009-03-31 07:39:38 +04:00
| |
( layout > = 16 & & layout < = 20 ) ;
}
static inline int algorithm_is_DDF ( int layout )
{
return layout > = 8 & & layout < = 10 ;
}
2010-07-26 05:57:07 +04:00
extern int md_raid5_congested ( mddev_t * mddev , int bits ) ;
2011-03-10 10:52:07 +03:00
extern void md_raid5_kick_device ( raid5_conf_t * conf ) ;
2010-06-01 13:37:24 +04:00
extern int raid5_set_cache_size ( mddev_t * mddev , int size ) ;
2005-04-17 02:20:36 +04:00
# endif