1
0
mirror of git://sourceware.org/git/lvm2.git synced 2026-01-29 16:32:51 +03:00

Compare commits

..

375 Commits

Author SHA1 Message Date
Alasdair G Kergon
d6cabbbc53 device: Fix basic async I/O error handling 2018-02-08 20:19:21 +00:00
Alasdair G Kergon
3e29c80122 device: Queue any aio beyond defined limits. 2018-02-08 20:15:37 +00:00
Alasdair G Kergon
db41fe6c5d lvmcache: Use asynchronous I/O when scanning devices. 2018-02-08 20:15:29 +00:00
Alasdair G Kergon
8c7bbcfb0f device: Basic config and setup to support async I/O. 2018-02-08 20:15:14 +00:00
Alasdair G Kergon
7a9af3cd0e device: Add flag to indicate that a code path can support AIO
Until the whole source supports AIO, library code can check for
AIO_SUPPORTED_CODE_PATH to determine whether or not it is OK
to use AIO.
2018-02-06 01:11:00 +00:00
Alasdair G Kergon
e869a52cc4 callbacks: Miscellaneous fixes for recent changes 2018-02-06 01:09:39 +00:00
Zdenek Kabelac
e727da6cf1 configure: ensure path /usr/sbin is checked for some tools
Some tools are typically installed into /usr/sbin (or /sbin) dir.
And some systems do not add this path to user's $PATH var.

Ensure sbin paths are looked through...
2018-02-01 21:57:05 +01:00
Zdenek Kabelac
4d4d5bf323 libdm: accept mirror status with userspace word in the line
Just making sure the parser will not stop - although greater level of
support needs to be added (Describing doc seems to be missing however).
2018-02-01 21:56:07 +01:00
Zdenek Kabelac
083c221cbe pvmove: reinstantiate clustered pvmove
In fact  pvmove does support  'clustered-core' target for clustered
pvmove of LVs activated on multiple nodes.

This patch restores support for activation of pvmove on all nodes
for LVs that are also activate on all nodes.
2018-02-01 21:55:20 +01:00
Tim Foerster
34fb5202bd lvmdbusd: Remove duplicated DataPercent definition 2018-01-25 08:37:24 -06:00
Marian Csontos
edb209776f doc: Add VDO stacking document 2018-01-25 11:15:23 +01:00
Zdenek Kabelac
a1cfef9f26 dev_io: fix writes for unaligned buffers
Actually the removed code is necessary - since not all writes are
getting alligned buffer - older compilers seems to be not able
to create 4K aligned buffers on stack - this the aligning code still
need to be present for write path.
2018-01-23 13:36:12 +01:00
Zdenek Kabelac
102926ed9f tests: update set of devices
Previous change added slighly more complex mapping so missed to update
few pieces.
2018-01-23 11:45:18 +01:00
Zdenek Kabelac
6e9148e7ab debug: drop DEBUG_MEM path
Memory is not allocated so no DEBUG_MEM part is needed.
2018-01-23 11:45:18 +01:00
Zdenek Kabelac
2ee105089a python: some LVs do need exclusive activation 2018-01-23 11:45:18 +01:00
Alasdair G Kergon
9194610f42 device: Add ioflags parameter to transfer additional state.
Flags are set on the initial I/O and passed to any callbacks that
may in turn issue further I/O using the inherited flags.
2018-01-21 21:10:23 +00:00
Zdenek Kabelac
f3c75bb201 activation: move check later
Check for lv when it's known in all cases.
2018-01-17 15:15:43 +01:00
Zdenek Kabelac
e16d309d56 tests: check pvmove is merging segments
pvmove was imporoved to properly merge consqutive segments after pvmove
so check it's working.
2018-01-17 14:45:48 +01:00
Zdenek Kabelac
5baf2de898 tests: longer startup timeout for daemons with valgrind
It's getting noticable somewhat slower...
2018-01-17 14:45:48 +01:00
Zdenek Kabelac
8ebd45fde7 tests: check preserved exclusivness of snapshot merge
Detect if origin remains exclusively activated after merging.
2018-01-17 14:45:48 +01:00
Zdenek Kabelac
8c7ec44bf0 tests: properly test with clustered VG
Testing in cluster makes sense only with clustered VG.
2018-01-17 14:45:48 +01:00
Zdenek Kabelac
3aedaa7f2a cleanup: drop unused code 2018-01-17 14:45:48 +01:00
Zdenek Kabelac
38b81e6537 cleanup: enhance messages
Add extra info about failing local exlusive activation
(as in cluster the LV can be active on some other nodes).
2018-01-17 14:45:48 +01:00
Zdenek Kabelac
a8bcdef4fd activation: guard exclusive activation
Add protectional internall error whenever we spot activation
of 'exclusive' only segments in 'non-exclusive' mode.

TODO: possibly the activation locking could be enhanced to handle
this fully behind the scene - as for now this works purely for
lvchange/vgchange activation.
2018-01-17 14:45:47 +01:00
Zdenek Kabelac
f41935909f dmeventd: add check for result code
Check result from pthread_kill.
2018-01-17 14:44:33 +01:00
Zdenek Kabelac
02621cffb0 pvmove: drop misleading pvmove restriction for cluster
pvmove handles properly locked LVs in cluster and this extra check
actually cause misbehavior as some LVs were silently skipped from
operation scope.
2018-01-17 14:44:33 +01:00
Zdenek Kabelac
5a961d3411 pvmove: better check for exclusive LV 2018-01-17 14:44:33 +01:00
Zdenek Kabelac
7c6fb63041 pvmove: fix _remove_sibling_pvs_from_trim_list
Fix the function to really check it sibling raid image LV.
For LV_rmeta_0  check for   LV_rimage_0   instead of
LV_rmeta_0rimage_0.
2018-01-17 14:44:31 +01:00
Zdenek Kabelac
e86910b052 lvconvert: use excl activation for conversion
Use properly exclusive activation when reactivating origin after
snapshot merge (since origin must have been previously also exlusively
activated).

Same applies when converting volumes to thin-pool or cache.

Previously used 'only' local activation incorrectly allowed local
activation of some targets (i.e. raid) - thus 'leaking' chance to
activate same device on another node - which can be a problem
for device types like raid.
2018-01-17 14:43:34 +01:00
Alasdair G Kergon
c26458339e device: Move buffer allocation nearer to the I/O.
Don't allocate memory until it's needed - later we'll add
some of the I/O to an internal queue instead of issuing it
immediately.
2018-01-16 01:12:08 +00:00
Alasdair G Kergon
081902b4c1 device: Merge _dev_read and dev_read_callback. 2018-01-16 00:41:42 +00:00
Alasdair G Kergon
0a3c6bf8c6 format_text: Refactor mda counting in label processing. 2018-01-15 23:47:44 +00:00
Alasdair G Kergon
73b5ee64e7 format_text: Change update_mda_baton to use label not labeller 2018-01-15 20:13:53 +00:00
Alasdair G Kergon
b825987b2f device: Rearrange _aligned_io(). 2018-01-15 20:10:54 +00:00
Alasdair G Kergon
c90582344d device: Add reason to devbuf. 2018-01-15 19:38:18 +00:00
Alasdair G Kergon
1f01eaa612 device: Store offset to data instead of pointer.
We want to save the relative offset before we've allocated the
buffer's memory.
2018-01-15 19:32:59 +00:00
Alasdair G Kergon
61d3296f2a device: Reorder device.h before change. 2018-01-15 19:24:01 +00:00
Alasdair G Kergon
da37cbd24f command: Skip some memory zeroing.
commands[] is in bss, its content wouldn't change if reinitialised
and unregister has no memory to free so don't bother.
2018-01-13 03:44:15 +00:00
Alasdair G Kergon
e4e2abc8bc libdm: Fix a size_t in _dm_malloc_aligned_aux message. 2018-01-12 00:42:53 +00:00
Alasdair G Kergon
5bbe68cf15 man: regenerate 2018-01-12 00:30:52 +00:00
Alasdair G Kergon
35cdd9cf48 label: Clean up storing of device and label sector.
No longer use the external 'result' pointer internally to set up the
cached label.  The callback _set_label_read_result() is now given the
internal label pointer directly

Callers that don't need the result are no longer required to pass a
label pointer into label_read().
2018-01-11 02:54:00 +00:00
Alasdair G Kergon
f771d3f870 config: Move use_mmap to local variable. 2018-01-10 20:35:02 +00:00
Alasdair G Kergon
6210c1ec28 device: Mark read-only device buffers const. 2018-01-10 19:57:10 +00:00
Alasdair G Kergon
c350f96c09 device: Eliminate unnecessary buffer from dev_read. 2018-01-10 18:48:01 +00:00
Alasdair G Kergon
366493a1d1 device: Suppress repeated reads of the same data.
If the data being requested is present in last_[extra_]devbuf,
return that directly instead of reading it from disk again.

Typical LVM2 access patterns request data within two adjacent 4k blocks
so we eliminate some read() system calls by always reading at least 8k.
2018-01-10 15:52:03 +00:00
Alasdair G Kergon
dcb2a5a611 device: Remove some data copying between buffers.
Callers that read larger amounts of data now get a pointer to read-only
data directly without copying it through an intermediate buffer.  This
data is owned by the device layer so the callers no longer free it.
2018-01-10 15:48:03 +00:00
Alasdair G Kergon
4d568b709c device: Free cached device bufs when metadata invalid or dev closed. 2018-01-10 15:48:03 +00:00
Alasdair G Kergon
bd0967a4b1 device: Keep the last data buffer read off each device.
If there's a second metadata area on device, we record that separately.

Note that the memory requirements aren't restricted yet.
2018-01-10 15:48:03 +00:00
Alasdair G Kergon
bacc942333 allocation: Avoid exceeding array bounds in allocation tag code
If _limit_to_one_area_per_tag() changes nothing it writes beyond
the array.
2018-01-10 15:48:03 +00:00
Alasdair G Kergon
e2438b5b9f format_text: Use malloc aligned for export buffer 2018-01-10 15:48:03 +00:00
Alasdair G Kergon
b65246499b label: Rename a variable 2018-01-10 15:48:03 +00:00
Alasdair G Kergon
ea96381534 libdm: Introduce dm_malloc_aligned 2018-01-10 15:48:03 +00:00
David Teigland
943b217797 man lvmlockd: remove lv resizing comment 2018-01-10 09:17:57 -06:00
David Teigland
51340888aa lvmlockd: print warning when skipping locking 2018-01-09 11:46:00 -06:00
David Teigland
46cedb105b lvmlockd: add lockopt values for skipping selected locks
and add lockopt to common options.
2018-01-09 11:20:10 -06:00
Alasdair G Kergon
f4675af4cf format_text: Use vgsummary callbacks 2018-01-09 03:14:30 +00:00
Alasdair G Kergon
4b02d4e22e label: Add label_read callback. 2018-01-08 23:30:50 +00:00
Alasdair G Kergon
6d322e68f3 label: Add callback fns (partially) 2018-01-08 17:04:56 +00:00
Alasdair G Kergon
5e7d3ad749 device: Introduce dev_read_callback
If it obtains the data, it passes it into the supplied callback function
and returns 1.  Otherwise the callback receives failed = 1.

Updated config_file_read_fd to use this and similarly return the data
via a callback fn of its own.
2018-01-06 02:40:12 +00:00
Alasdair G Kergon
946f07af3e metadata: Use a consistent format for callback fn parameters 2018-01-05 14:24:56 +00:00
Alasdair G Kergon
a0ddfad94b metadata: Change the new data processing fns to void.
Move the existing fn return codes into the new structs.
2018-01-05 03:12:22 +00:00
Alasdair G Kergon
c70c9f6565 format_text: Split vgname_from_mda into three pieces. 2018-01-04 21:13:44 +00:00
Alasdair G Kergon
d61b1369d0 format_text: Split out raw_read_mda_header processing 2018-01-04 15:52:59 +00:00
Alasdair G Kergon
139209ef42 format_text: Split up _update_mda.
Dedicated functions are now used to process each piece of data obtained,
so the refactoring in this file gives us one for the vgsummary and one
for the metadata header.  This new type of function takes two parameters
(for now), the obtained data plus a single struct (that must not
reference any data on the stack) that wraps up the entire context needed
to process it.
2018-01-04 12:25:24 +00:00
Alasdair G Kergon
111a9fcff5 format_text: Allocate update_mda baton from mempool.
Also store return code.  Note that fatal and non-fatal errors while
handling the mda aren't currently distinguished.
2018-01-03 23:53:00 +00:00
Alasdair G Kergon
5a846e0929 format_text: Split the text import fns into two pieces. 2018-01-03 20:48:02 +00:00
Alasdair G Kergon
4b9806ab6f toolcontext: Add paired label_init to refresh_toolcontext.
label_init() and label_exit() should be paired.
2018-01-02 22:00:31 +00:00
Alasdair G Kergon
22b6c482ec config: Split config buffer processing into new fn.
Wrap its parameters into struct process_config_file_params allocated
from a mempool now passed into the config_file_read* fns.
2018-01-02 21:10:46 +00:00
David Teigland
96801ac085 man lvmlockd: update wording 2018-01-02 13:35:58 -06:00
Alasdair G Kergon
3db51e3f0e label: Wrap _find_labeller params into a struct.
Move the actual buffer reading up to _label_read() so _find_labeller()
just examines the buffer supplied.
2018-01-02 17:15:32 +00:00
Alasdair G Kergon
9b830791ea label: Move _set_label_read_result call into _find_labeller.
Move responsibility for setting the label_read() result parameter down
into _find_labeller().
2018-01-02 15:30:58 +00:00
Alasdair G Kergon
4f4ddb806d label: Move setting result of label_read into separate fn. 2018-01-02 14:19:20 +00:00
Alasdair G Kergon
e6b4b41881 label: Add mempool. 2018-01-02 13:37:12 +00:00
Zdenek Kabelac
0bf1cc2320 tests: sleep first
Sleep a bit before checking /sys/block dir so the kernel has a moment to
actually put scsi debug device in it...

Some quite old kernels are in troubles with this plain searching grep
without sleep (namely 2.6.32)

modprobe scsi_debug
 <sleep .1>
grep -H scsi_debug /sys/block/*/device/model
modprobe -r scsi_debug
2017-12-19 15:28:07 +01:00
Zdenek Kabelac
3a841515af lvm-string: add function to detect component LV suffix
Add is_component_lvname() function to recognize component LV name.
2017-12-19 15:28:07 +01:00
Alasdair G Kergon
17649d4ac8 device: Move dev_read memory allocation into device layer.
Rename dev_read() to dev_read_buf() - the function that reads data
into a supplied buffer.

Introduce a new dev_read() that allocates the buffer it returns and
switch the important users over to this.  No caller may change the
returned data.  (For now, callers are responsible for freeing it after
use, but later the device layer will take full ownership.)

dev_read_buf() should only be used for tiny buffers or unimportant code
(such as the old disk formats).
2017-12-19 01:31:50 +00:00
David Teigland
3f9ae846b8 lvmlockd: clear coverity complaint
from previous coverity fix, it's never happy.
2017-12-18 15:19:17 -06:00
Alasdair G Kergon
81be333e9f post-release 2017-12-18 20:43:09 +00:00
Alasdair G Kergon
a1f7a48325 pre-release 2017-12-18 20:36:10 +00:00
Alasdair G Kergon
5f45cb90a7 format_text: Transfer circular buf alloc to device layer.
Instead of the caller passing dev_read_circular() a buffer to fill with
data, the device layer itself now allocates it.
2017-12-15 22:34:26 +00:00
Alasdair G Kergon
beee9940a5 format_text: Separate out code paths for buffer wraparound
The creation of wrapped around metadata - where the start of metadata is
written up to the end of the buffer and the remainder follows back at
the start of the buffer - is now restricted to cases where writing the
metadata in one piece wouldn't fit.  This shouldn't happen in 'normal'
usage so let's begin treating the code for this as a special case that
can be ignored when optimising 'normal' cases.
2017-12-15 21:12:19 +00:00
Alasdair G Kergon
145ded10c2 format_text: Supply mempool directly to raw_read_mda_header. 2017-12-15 14:57:05 +00:00
Marian Csontos
83e1a0bad8 lvm2app: Suppress deprecation warnings for our builds 2017-12-14 16:45:53 +01:00
Marian Csontos
c957d46f1d lvmdbusd: Make lvmdbusd executable
- Add files built from *.in to builddir files.
- Add all files built from *.in to DISTCLEAN_TARGETS.
2017-12-14 16:45:53 +01:00
Marian Csontos
c48d22bd3c docs: Add TESTING file 2017-12-14 16:45:47 +01:00
Heinz Mauelshagen
584ff361df test: fix trap adding proper teardown to previous, new raid tests
Also remove superfluous variable.
2017-12-13 14:41:23 +01:00
Heinz Mauelshagen
86fa0333ff test: new raid tests
Add three new raid tests with io load and table
reloads during reshape for target 1.13.2.

Add a raid0 to raid10 conversion test.

Also add more signals to trap in lvconvert-raid-reshape-load.sh.
2017-12-13 13:30:53 +01:00
Alasdair G Kergon
3edc25dbdf format_text: Round size written up to multiple of 4096.
Zero-fill metadata up to the next 4096 boundary then write out a
multiple of 4096 bytes to avoid triggering a read-modify-write.
2017-12-12 22:52:22 +00:00
Alasdair G Kergon
78ffa44fc5 format_text: Change metadata alignment from 512 to 4096.
If there is sufficient space in the metadata area, align the next
metadata to a disk offset that is a multiple of 4096 bytes and
don't write it circularly.  If it doesn't all fit at the end
of the metadata area, go back to the start and write it all there
contiguously.

If there is insufficient space to use the new stricter rules, revert to
the original behaviour, aligning on 512-byte boundaries wrapping around
the circular buffer as required.
2017-12-12 20:57:36 +00:00
Alasdair G Kergon
643df602c7 format_text: More refactoring of metadata offset calcs 2017-12-12 18:51:32 +00:00
Alasdair G Kergon
4002f5e206 format_text: Refactor and document metadata offset calculation. 2017-12-12 18:36:54 +00:00
Alasdair G Kergon
e932c5da50 device: Fix an unpaired device close.
dev_open_flags contains an unpaired dev_close_immediate so increment
open_count before calling it.
2017-12-12 17:56:58 +00:00
Alasdair G Kergon
b96862ee11 metadata: Consistently skip metadata areas that failed.
Even after writing some metadata encountered problems, some commands
continue (rightly or wrongly) and attempt to make further changes.

Once an mda is marked MDA_FAILED, don't try to use it again.
This also applies when reverting, where one loop already skips
failed mdas but the other doesn't.

This fixes some device open_count warnings on relevant failure paths.
2017-12-12 17:52:45 +00:00
Marian Csontos
15ccea7111 test: Fix condition when detecting lvmdbusd 2017-12-12 14:19:22 +01:00
Alasdair G Kergon
c5ef76bf27 device: Internal error if writing 0 bytes to dev. 2017-12-12 12:57:25 +00:00
Marian Csontos
7272fd2210 lvmdbusd: All tools use detected python3
- lvmdb.py and lvm_shell_proxy.py can be used as standalone tools, so
  should use detected value.
- clean executable bit on *.in files.
2017-12-12 13:17:07 +01:00
Marian Csontos
2f4c2a43d4 test: lvmdbusd is used for process name
lvmdbusd was started, but the process was not recognized by pgrep.

- configure does not make the script executable - set the flag
  explicitly when running make check,
- process name changed to lvmdbusd. The previous python3 value
  originated from the use of /usr/bin/env.
2017-12-12 13:17:07 +01:00
Alasdair G Kergon
b76c6951aa format_text: Adjust metadata alignment calculation.
Use new ALIGN_ABSOLUTE macro when calculating the start location
of new metadata and adjust the end of buffer detection so that
there is no longer an imposed gap between old and new metadata.
2017-12-11 20:25:03 +00:00
Alasdair G Kergon
053d35de47 format_text: Use absolute alignment to calculate metadata usage
Currently both start and offset should always be divisible by alignment,
so this should have no effect, but a later patch will increase alignment
so these variables can no longer be optimised out.
2017-12-11 17:14:38 +00:00
Alasdair G Kergon
2db67a8ea0 format_text: Move metadata size checking into separate fn.
Move checks into _metadata_fits_into_buffer() and add macro for alignment.
2017-12-11 17:08:29 +00:00
Alasdair G Kergon
46393bfca0 format_text: Log additional circular buffer information. 2017-12-11 16:07:34 +00:00
Alasdair G Kergon
49d486319f format_text: Replace PRI with FMT. 2017-12-11 15:39:25 +00:00
Marian Csontos
75d4d8e9a4 configure: Fix deprecation warning for lvmapp
* and properly regenerate configure script
2017-12-11 15:20:48 +01:00
Marian Csontos
2aedc98242 lvmdbusd: Fix path to python3
lvmdbusd executable script must use python3 interpreter detected by
configure script, as site-packages directory used for library is only
used by that interpreter.
2017-12-11 12:36:54 +01:00
Marian Csontos
6a6c8d7b81 configure: applib is not required by dbusd 2017-12-11 12:12:42 +01:00
Zdenek Kabelac
a64c73a979 cleanup: debug message 2017-12-08 13:21:15 +01:00
Zdenek Kabelac
700e2a2d25 cleanup: switch to standard query
In /tools part we should mostly use standard functions
lv_is_active*  unless there is good reason to not use it.
2017-12-08 13:21:15 +01:00
Zdenek Kabelac
71485ebfc7 thin: regression fix for metadata checking
Fix regression from commit f173274fe4
and restore support for 'disabled' checking via lvm.conf.
2017-12-08 13:21:15 +01:00
Zdenek Kabelac
455b26b8db activation: keep priority till memlock_unlock
Although it doesn't look like it can be a measurable problem
and costs some time to flip priorities outside of activation window.

So just like with memory locking preserve priority until call
memlock_unlock() appears.

(addition to commit c086dfadc3).
2017-12-08 13:21:15 +01:00
Zdenek Kabelac
4e8af1d3aa vgchange: drop extra counting
As the loop for background polling _poll_lvs_in_vg checks for active LVs,
avoid doing unnecessary extra check before.
2017-12-08 13:21:14 +01:00
Alasdair G Kergon
14b1e5270d format_text: Use explicit alignment in wrapping calc.
Expand out the metadata wrapping calculations to prepare
to support a larger alignment.

The current alignment is 512 bytes so
(mdac_area_start + rlocn->offset) % alignment is zero.
2017-12-08 01:18:46 +00:00
Zdenek Kabelac
4c88c4626d debug: update debug msg
Use _node_name.
2017-12-07 21:00:39 +01:00
Zdenek Kabelac
717714b24f cleanup: use log_warn 2017-12-07 21:00:39 +01:00
Zdenek Kabelac
f173274fe4 cleanup: reorder calling of pool checking tools
Test for zero header before even starting to create argument list for
execution of thin/cache_check tool.
2017-12-07 21:00:39 +01:00
Zdenek Kabelac
820b1b98fc libdm: drop extra structure copy
When doing resume, directly pass location where new updated info
needs to be stored.

_resume_node() ensures the info is ONLY updated when the function
is successful and never changes it on error path.
2017-12-07 21:00:39 +01:00
Zdenek Kabelac
82ae02bc6a libdm: use delay_resume_if_extended
Update the logic towards more explicit logic.

Preload tree normally does not want to resume, only
in certain cases of extension or new loaded nodes can be
resumed. So introduce new internal variable delay_resume_if_extended
controlable by target.

Patch itself is not changing current existing behaviour,
and rather documents existing problem in more readable way.

lvm2 needs to introduce explicit mechanism how to support more
fain-grained (and safe) logic to i.e. resize thin-pool which
can be sitting on cached raid volume.
2017-12-07 21:00:39 +01:00
Zdenek Kabelac
297d5915c3 libdm: avoid checking status on activation
Variable props.send_messages has 3 states and was not used properly
here.  Activation in this moment does not need to verify thin-pool status
as that has been already checked on preload.
So only if there are some real messages (value 2) call function
for sending them.
2017-12-07 21:00:36 +01:00
Zdenek Kabelac
4a4ea47f70 libdm: add help func _get_last_load_segment
Share code for same functionality.
2017-12-07 20:59:37 +01:00
Zdenek Kabelac
c3e224ad0e thin: missing type is error 2017-12-07 20:59:37 +01:00
Zdenek Kabelac
2208ebfe16 thin: always clear memory before parsing status
Ensure there cannot be 'reused' any data from some previous call.
2017-12-07 20:59:28 +01:00
Alasdair G Kergon
2166d7be72 lvmetad: drop stray underscore 2017-12-07 16:24:14 +00:00
Alasdair G Kergon
d591d04103 device: Tag I/O for each mda on a device separately in log messages.
Mark the first metadata area on each text format PV as MDA_PRIMARY.
Pass this information down to the device layer so that when
there are two metadata areas on a block device, we can easily
distinguish two independent streams of I/O.
2017-12-07 03:48:11 +00:00
David Teigland
54154dc6f1 lvmlockd: clear coverity complaint 2017-12-06 10:49:31 -06:00
David Teigland
0f0f6978e7 lvmlock: return an error if lvb cannot be written
It doesn't make much difference, because if this happens,
it means there are other more significant problems.
2017-12-06 10:49:31 -06:00
Marian Csontos
9e4dc83241 Update WHATS_NEW 2017-12-06 10:41:14 +01:00
David Teigland
b910c34f09 lvmlockd: use pool lock for tmeta access
When a command is run on a named tmeta LV, use
the lock on the pool.
2017-12-05 14:31:03 -06:00
David Teigland
b9e4198500 lvmlockd: fix log print
from previous commit
2017-12-05 13:48:30 -06:00
David Teigland
5d5807b238 lvmlockd: improve error message for VG lock conflict
When there is significant VG lock contention which retries
have not been able to mask, print a better error message.
2017-12-05 11:53:03 -06:00
David Teigland
46d6f7a639 lvmlockd: recognize normal errors from sanlock_convert
Don't log an error message for error values returned
by sanlock_convert for expected conditions like lock
contention or io timeouts.
2017-12-05 11:53:03 -06:00
David Teigland
bd893348b4 lvmlockd: avoid blocking in sanlock_convert
Use the same flag as sanlock_acquire() for the
same purpose.  sanlock_convert can block like
acquire when doing a sh to ex conversion.
2017-12-05 11:53:03 -06:00
Heinz Mauelshagen
94632eb155 deactivate_lvs: deactivate any missing RaidLV legs
In case of failed legs, raid replaces those with
e.g. "vg-lv_rimage_0-missing_0_0" mapped to an error target.

Those errouneously remain on deactivation.

Fix by removing them on deactivation/removal of the RaidLV.
2017-12-05 18:48:06 +01:00
Alasdair G Kergon
7195df5aca device: Skip read-modify-write if replacing whole block. 2017-12-05 01:00:38 +00:00
Alasdair G Kergon
e4805e4883 device: categorise block i/o
Introduce enum dev_io_reason to categorise block device I/O
in debug messages so it's obvious what it is for.

DEV_IO_SIGNATURES   /* Scanning device signatures */
DEV_IO_LABEL        /* LVM PV disk label */
DEV_IO_MDA_HEADER   /* Text format metadata area header */
DEV_IO_MDA_CONTENT  /* Text format metadata area content */
DEV_IO_FMT1         /* Original LVM1 metadata format */
DEV_IO_POOL         /* Pool metadata format */
DEV_IO_LV           /* Content written to an LV */
DEV_IO_LOG          /* Logging messages */
2017-12-04 23:45:26 +00:00
Zdenek Kabelac
698483b5a1 activation: also lock memory for clustered locking
Commit  c086dfadc3 missed to lock memory
for clustering suspend part since it's using differnt locking reason.
2017-12-04 23:33:02 +01:00
Zdenek Kabelac
406b566cfc cleanup: drop unneeded check
Code already has dereferenced UUID before this point,
and its already given we require name & uuid when ading new node
(although uuid could be empty string).
2017-12-04 15:45:49 +01:00
Zdenek Kabelac
5abf6b7c21 cleanup: messsage cleanup 2017-12-04 15:38:50 +01:00
Zdenek Kabelac
76954884c7 cleanup: drop unused define 2017-12-04 15:38:50 +01:00
Zdenek Kabelac
110dac870c cleanup: use existing define with prefix 2017-12-04 15:38:50 +01:00
Zdenek Kabelac
1f73cadd2d cleanup: use log_warn
Fucntion is not failing execution -> log_warn.
2017-12-04 15:38:50 +01:00
Zdenek Kabelac
76322d3b3e clenaup: use log_warn
Avoid logging error when function is not failing.
Technically can't really happen ATM anyway.
2017-12-04 15:38:50 +01:00
Zdenek Kabelac
2a01e3d4ca cleanup: use _node_name
Use existing internal method for create 'name (major:minor)' string
for debug messages and reduce some messages.
2017-12-04 15:38:50 +01:00
Zdenek Kabelac
925fec6ecb cleanup: stack tracing 2017-12-04 15:38:50 +01:00
Zdenek Kabelac
e3366787b6 cleanup: mark success at the end
Simplify setting 'success' return value and use common use-pattern
for handling return code.
2017-12-04 15:38:50 +01:00
Zdenek Kabelac
10f37345eb cleanup: drop impossible test case
This test can never be true since info is embeded struct.
2017-12-04 15:38:50 +01:00
Zdenek Kabelac
1f6d79ab48 cleanup: simplier error message
Use single 'error' message just with different reason.
2017-12-04 15:38:50 +01:00
Zdenek Kabelac
7379a2624b cleanup: futher code reduction
Just like everywhere else - use single if() for major:minor setup
(it basically can't fail as of today anyway)

Always leave funtion with correctly set pointers even on error path.
2017-12-04 15:38:50 +01:00
Zdenek Kabelac
2a22576b2d cleanup: drop unused header
DM_UUID_LEN is no longer needed.
2017-12-04 15:38:50 +01:00
Zdenek Kabelac
e447d7ca5e libdm: support for replicator target is dropped
Replicator never really existed in upstream kernel and its support
got deprecated.

Also its support never got finished so no code is supposed to be
using it anyway.

Libdm symbols are remaining, just the implementation will always
return failure - so any user of:

dm_tree_node_add_replicator_dev_target()
dm_tree_node_add_replicator_target().

will now always recieve error message.
2017-12-04 15:38:50 +01:00
Zdenek Kabelac
63368a5064 libdm: watch for failing _info_by_dev
Separate handling of error code from _info_by_dev.
This error can only happeng when we are running out of memory.
In such case there is urgent need to stop any futher proceeding
of command and run to error ASAP.
2017-12-04 15:38:50 +01:00
Heinz Mauelshagen
b74e7f6a78 test: allow to succeed in the cluster
Avoiding "$(get first_extent_sector "$d")" in the loop
allows the test to succeed in the cluster.  Further cluster
analysis needed to get to the core reason.
2017-12-01 18:59:55 +01:00
Heinz Mauelshagen
053314f295 test: add large tests skip option
The lvm2 test suite aims at small test resource footprints
(few PVs, small PV sizes) to run on tmpfs backed loop device.

OTOH, lvconvert-reshape-raid.sh aims to test the maxima of
supported total stripes of 64.  This patch adds a prerequisite
conditional to skip tests using more than 14 stripes.

It requires the target version 1.13.1 to avoid deadlocks.
2017-12-01 18:50:55 +01:00
Heinz Mauelshagen
7aef59c6c3 test: reduce pv sizes, fix comment and stripe size 2017-12-01 18:42:37 +01:00
Heinz Mauelshagen
16fa9d9ed5 test: cleanup command 2017-12-01 18:39:38 +01:00
Heinz Mauelshagen
4daad1cf11 lv_manip: allow extension on --nosync raid lv
If the recovery of the repleced leg(s) of a RaidLV created without
initial resynchronization (i.e. "lvcreate --nosync ...") got
interrupted, it can't be extended because of the < 100% sync rate.
2017-12-01 18:38:18 +01:00
Heinz Mauelshagen
3688eeeea0 test: disable new reshape test until target version 1.13.1 2017-12-01 18:33:11 +01:00
Heinz Mauelshagen
9681d98cc4 test: new reshape test under io load 2017-12-01 18:31:12 +01:00
Heinz Mauelshagen
d3d18e637c raid: ignore --stripesize on raid4/5 conversion to 1 stripe
In case caller passes in changed stripe size when reshaping raid4/5
to 1 stripe aiming to convert to raid1 and optionally to linear,
ignore it to prevent data corruption.
2017-12-01 15:00:09 +01:00
Zdenek Kabelac
a42c3a0e90 cleanup: remove debug code 2017-12-01 12:19:09 +01:00
Zdenek Kabelac
0e177cc7c9 cleanup: simplify _deps code
Make _deps to always return name & uuid,
and postpone duplication of name and uuid into _create_dm_tree_node().
Saves some duplicated code.
2017-12-01 12:19:09 +01:00
Zdenek Kabelac
4dc8184803 suspend: optimize generated list
Avoid adding same LV multiple times into the list.
Just saves couple extra calls and ioctls and makes log shorter.
2017-12-01 12:19:09 +01:00
Zdenek Kabelac
7e794b7748 activation: avoid rechecking pvmove node
Use new 3rd. state of trace_pvmove_deps == 2.
In this state we know, we have already seen the node and can skip futher
testing.   Remainging value 1 signals we want to track, and value 0
is for ignoring tracking, but node is still checking in this case.

Reduces large amount of duplicate ioctl queries.
2017-12-01 12:19:09 +01:00
Zdenek Kabelac
e4db42e476 activation: extend resume validation
Check also all snapshosts when resume is requested,
the origin volume is already resume, but possibly
some subLV or snapshot LV could be suspended if
we are still in critical_section.
2017-12-01 12:19:09 +01:00
Zdenek Kabelac
c086dfadc3 activation: split priority from memory locking
When entering any critical section, lvm2 used to lock process memory
and raised task priority to avoid problem with page swapping and minimize
time of having non-resumed devices in table.

With this patch, memory locking which which is expensive is only used when
entering  'suspending' section as only in this section there is risk
lvm could be suspending a device which later can be needed for paging.

Raised priority is still kept for all section entrances as this is
low-cost operation and may accelerate table resumes - although the real
impact can be still considered later.
2017-12-01 12:19:09 +01:00
Zdenek Kabelac
c489dd2e17 pvmove: add missing segment merging
When pvmove is finished and metadata are updated, the code missed
to merge possible mergable segments - so add explicit merging
call after pvmoved volumes are unlocked.

This avoids weird results where i.e. lvs could have been reporting
non-matching segments as lvs upon metadata read is doing silent segment
merging while dm table left after pvmove was still preserving
non-merged segments.
2017-12-01 12:19:09 +01:00
Zdenek Kabelac
fbd8b456db pvmove: move code from tools to lib
Move code manipulating with locking flags into /lib part of lvm.
2017-12-01 12:18:32 +01:00
Marian Csontos
94d3878efb Update WHATS_NEW 2017-11-30 13:30:28 +01:00
Marian Csontos
b8caca4652 tests: Fix common_dev_ argument handling
from[:[len]] arguments with missing colon assigned from to len, instead
of till_the_end_of_device.
2017-11-28 12:03:35 +01:00
Alasdair G Kergon
a9812ec9d3 label: Remove unused verify functions.
label_verify has never been used so remove it.
2017-11-28 01:36:55 +00:00
Zdenek Kabelac
02e934c444 cleanup: reuse existing macro
Use existing macro to detect striped raid segment.
2017-11-27 10:34:30 +01:00
Zdenek Kabelac
efa17cae24 cmdline: avoid overrun on very large numbers.
When large size number (>2^31) is given on command line it could be
misdetected and in certain cases lead to wrongly casted number.

So make sure all cases always do set _MAX number in case the value would
not fit within the supported range instead of getting some random value
within the range.

In most cases this was not a problem to detect, but i.e. stripesize
parameter might have been fooled by certain large numbers.
2017-11-27 10:34:30 +01:00
Zdenek Kabelac
34eb082bbc toollib: improve stripes args reading
Rewrite validation of stripes and stripe_size args into more readable
sequential code.

Extend reading of stripes & stripes_size args so it better knows
defaults for types like striped raid.

TODO: this should really be a value obtained for segtype structure and
all the weird conditions and modification of stripes and stripe_size
around lvm2 code should be dropped.
2017-11-27 10:34:30 +01:00
Zdenek Kabelac
f70404addb pvmove: enhance delayed_resume logic
ATM we want to support delayed resume purely in pvmove case.
So have libdm logic internal to recognize difference beween
pvmove and other targets that do use delayed resume.

This fixes problem introduced with commit aa68b898ff
for mirror-on-mirror or snapshot-on-mirror problem.

TODO: likely added new API call and let libdm user select
delayed nodes explicitely.
2017-11-26 00:36:48 +01:00
Zdenek Kabelac
8c6fd0933f activation: enhance holders detection
Use code which detectes handlers in a way, which is more
backward-compatible friendly.

Replace read of 'sysfs' uuid entry with dm ioctl call.

Use /sys/block/dm-X/holders path instead of
new path  /sys/dev/block/major:minor/holders.

TODO:
There are few more occurencies of this logic around the code
so some abstract interface should be considered.
2017-11-26 00:31:26 +01:00
Zdenek Kabelac
ce83162d7c tests: more places where missing should not appear 2017-11-24 16:09:59 +01:00
Zdenek Kabelac
7eae2647c0 tests: typo 2017-11-24 16:09:59 +01:00
Zdenek Kabelac
7bffbe0e2f cleanup: update error messages
Correct printed lv names in error messages
and add missing trace for return.
2017-11-24 16:09:59 +01:00
Zdenek Kabelac
b0398f42ad cleanup: using max args consistently
Define and use MAX_PDATA_ARGS for thin_repair and cache_repair consistently.
2017-11-24 16:09:59 +01:00
Zdenek Kabelac
54741aeac5 cleanup: reduce couple debug lines 2017-11-24 16:09:59 +01:00
Zdenek Kabelac
5e88d3a89b cache: use conditional in warning message
In some cases the message could be slightly misleading so use
here rather conditional.

TODO:

In future we may possibly further tune the message in case we are
certain the level of redundancy protection has not been reduced.
2017-11-24 16:09:59 +01:00
Zdenek Kabelac
5b5c5cc618 toollib: avoid printing default stripe size
When user is not creating striped LV, do not print
rather confusing info about default stripe size.
2017-11-24 16:09:59 +01:00
Zdenek Kabelac
16ef133be2 pvmove: finish no longer needs explicit resumed
Activation code can see from holders needed LVs for resume.
2017-11-24 16:09:59 +01:00
Zdenek Kabelac
2a6981a697 activation: automaticaly discover pvmove holders
When pvmove is finished and does 'suspend/resume' on PVMOVE LV,
on resume path committed metadata are already showing 'standalone'
pvmove LV prepared just for removal.

However code should be able to 'resume' preloaded LV there were
participating in pvmove operation.

Previously this was all done in the 'tools' part of lvm2 code.
So the lvconvert upon pvmove finish had to explicitely call 'resume' on every such LV.
Now 'smarted' activation code is able to deduce and combine all information from
the active dm table and committed metadata so single  call resolves
it all in one go.

Internally holders are detected by reading sysfs directory to capture
all needed UUID which are then looked in lvm2 metadata and all such
LVs are automatically collected into dmtree.
2017-11-24 16:09:59 +01:00
Zdenek Kabelac
ddbe763eb8 mirror: use lv_update_and_reload_origin
Replace complex code with standard lv_update_and_reload_origin().
Extra suspend should not be necessary.
(If they would be - dependency tree would have bug for fixing).
2017-11-24 16:05:21 +01:00
Zdenek Kabelac
aa68b898ff libdm: preload propagates delayed resume
Propagate delayed resume at least for preload case in a simple way.
Currently  PVMOVE depends on internal logic where 'mirror' with
corelog is 'possible' PVMOVE. In such case resume of 'created'
node is 'delayed'.

This is mostly an ugly internal hack - but for the moment being when we
add propagation for preload - it does work reasonable.

TODO: provide standard API and avoid this internal 'guessing'.
2017-11-24 16:05:21 +01:00
Zdenek Kabelac
b5be7420d9 locking: pvmove is locking holding LV
As we do get lock for pvmove LV - it's lockholder ATM.
2017-11-24 16:05:21 +01:00
Zdenek Kabelac
8cf10948bd resume: secure critical section
Only  thin-pool with origin_only suspend is allowed to be not suspending anything.
In such case pairing resume will 'decrement' critical section counter.
2017-11-24 16:05:21 +01:00
Zdenek Kabelac
ae6beda12d suspend: handle start of pvmove
Just like suspend handles preload for pvmove finish,
in similar way handle suspend of starting pvmove.

In this case the  precommited metadata are checked for list of PVMOVEed
LVs and those are suspended in with committed metadata.
2017-11-24 16:05:21 +01:00
David Teigland
175d06a929 lvconvert: prevent combining split snapshots in shared vg
splitting snapshots was already prevented.
2017-11-21 11:45:49 -06:00
David Teigland
ea0463791d man: lvmlockd steps for changing lock type
were not quite correct
2017-11-21 10:37:00 -06:00
David Teigland
bbaaf4f1d3 lvmlockd: override unknown lock manager error numbers
When sanlock or dlm lock managers return an error number
that we don't recognize, replace it with a generic -ELMERR
which is defined in the set of special lvmlockd error
numbers.  Otherwise, an unknown lock manager error number
could be misinterpreted for something else if it happened
to overlap another set of error numbers (which they have
not thus far.)
2017-11-17 10:59:12 -06:00
David Teigland
e52d2e3bd8 lvmlockd: retry on other sanlock errors
These less common errors returned from sanlock should
also cause sanlock to retry the lock acquire:

- i/o timeout occurs during sanlock_acquire().
  other i/o on the same disk as the leases can cause
  sanlock i/o timeouts.

- low level disk paxos contention between hosts naturally
  causes one host to not acquire the lease.  There are a
  couple special error numbers associated with these cases
  that should just be recognized as a normal failure to
  acquire the lease.
2017-11-17 10:59:12 -06:00
Alasdair G Kergon
115e66e9be device: log debug when I/O bounce buffer used 2017-11-16 19:16:10 +00:00
Alasdair G Kergon
efb0e7ac6f lvmdiskscan: Remove unnecessary read.
dev_get_size will open the device itself.
2017-11-16 19:13:03 +00:00
Heinz Mauelshagen
93c02e2532 raid: add validation checks for reshape flags
Enhance vg_validate() raid checking functions to check for
flags  LV_RESHAPE and LV_RESHAPE_DELTA_DISKS_(MINUS|PLUS).
2017-11-15 21:24:44 +01:00
Zdenek Kabelac
b0618f9011 tests: pvmove supports more cases
pvmove handles cached LVs as well as snapshots now.
2017-11-15 21:00:29 +01:00
Zdenek Kabelac
0f0dc1a2a5 pvmove: remove unusued code
Support for snapshot and cache LVs should now work.
Remove protection rejecting pvmove for them.
2017-11-15 21:00:29 +01:00
Zdenek Kabelac
b978f505ff pvmove: activation changes
Do not use precommitted LV for suspend of pvmoving LVs.
2017-11-15 21:00:26 +01:00
Zdenek Kabelac
a25f9b2106 pvmove: change locking on finish 2017-11-15 14:07:00 +01:00
Zdenek Kabelac
9d04ecc7b3 cleanup: update log messages 2017-11-15 14:03:22 +01:00
Zdenek Kabelac
838592a171 activate_lvs: use exclusive activation
There is no need to differentiation between clustered VG and normal VG.
As the activation depends on locking type.

Use unconditionally locally exclusive activation for pvmove.
2017-11-15 14:03:22 +01:00
Zdenek Kabelac
8212e1047e pvmove: always flush when working with pvmove tree 2017-11-15 14:03:22 +01:00
Zdenek Kabelac
35b207946a activation: suspend pvmove using lv.
Whenever pvmove tree is going to be generated for suspend
and such LV has a user - use this 'using LV' to generate
correct dm tree  holding all components.
2017-11-15 14:03:22 +01:00
Zdenek Kabelac
9e2a68a981 activation: check subLV before skipping resume
LV is asked for resume, and its already resume and tool
is inside 'critical_section()' check if there is any suspended sub LV.
In that case 'resume' operation will not be skipped.
2017-11-15 14:03:22 +01:00
Zdenek Kabelac
c820b43fc0 activate_lvs: deactivate on uniterate
When activation of LVs fails prior pvmove start, try to deactivate
already activated LVs.

TODO: possibly remember which LVs where already activate and only those
take down - devices which are already in-use will stay active.
2017-11-15 12:11:33 +01:00
Zdenek Kabelac
eab9097b46 layers: collect only lock holding LVs 2017-11-15 12:11:33 +01:00
Zdenek Kabelac
cc854c0617 pvmove: return pvmove itself
When find_pvmove_lv_in_lv() get already a 'pvmoving' LV - return it.
2017-11-15 11:51:53 +01:00
Zdenek Kabelac
919744375e pvmove: snapshots need to go with origin device
Enable future support for pvmove for COW LVs.
2017-11-15 11:51:53 +01:00
Marian Csontos
7c5531b4ca Update WHATS_NEW 2017-11-15 10:01:50 +01:00
Alasdair G Kergon
fe63c09381 regenerate man and conf 2017-11-15 01:05:07 +00:00
Alasdair G Kergon
02e9876665 log: Add io debug class 2017-11-15 01:02:15 +00:00
Alasdair G Kergon
b5f62a143d metadata: Eliminate redundant nested VG metadata
Only lv_committed() now uses vg->vg_committed and it appears redundant
if its contents match the enclosing VG so don't waste cycles creating it
when that's known to be true when no write lock is held so the struct
won't get modified.
2017-11-14 15:38:55 +00:00
Heinz Mauelshagen
7a5728fb4c test: additional avoid have_raid overhead 2017-11-14 14:41:44 +00:00
Heinz Mauelshagen
97750cd12f test: avoid have_raid overhead 2017-11-14 14:41:15 +00:00
Heinz Mauelshagen
ebd0fed0ce raid: correct raid6_n_6 -> raid5 convenience type
Fix "lvconvert --type raid5 RaidLV" on a "raid6_n_6" LV offering
false "raid6_ls_6" instead of "raid5_n".
2017-11-14 14:41:06 +00:00
Alasdair G Kergon
00acae12a4 metadata: Remove unused vg.cft_precommitted
The precommitted metadata config_tree is now only referenced from a
single function so just use a local variable instead.
2017-11-14 01:22:09 +00:00
Alasdair G Kergon
6bf0f04ae2 log: Improve various device-related messages
- Use 'lvmcache' consistently instead of 'metadata cache'
- Always use 5 characters for source line number
- Remember to convert uuids into printable form
- Use <no name> rather than (null) when VG has no name.
2017-11-13 19:45:33 +00:00
Alasdair G Kergon
598fcccf45 persistent filter: Skip import before rescan
The persistent filter should not be imported by any command that doesn't
use it so take addtional note of REQUIRES_FULL_LABEL_SCAN (for vgrename)
and introduce IGNORE_PERSISTENT_FILTER for vgscan and pvscan.
2017-11-13 19:45:16 +00:00
Zdenek Kabelac
fe69a8d215 tests: use nonflusing status
Avoid being blocked when reporting status with flushing on some stacked
device tree.
2017-11-11 01:00:35 +01:00
Zdenek Kabelac
dd06a0a4a6 lv_lock_holder: unused cache-pool is not lock holder
Unused cache-pool is only a constainer for data and metadata,
and does not present localble entity.
2017-11-11 00:59:46 +01:00
Zdenek Kabelac
52cee9dd83 lvremove: for unused cache deactive sublv 2017-11-11 00:59:19 +01:00
Zdenek Kabelac
55b8204ca3 reload: do not take backup with suspended devices
If the suspend/resume sequence would leave some device in suspend
for possible later resume, backup cannot be takes (fs holding backups
could be still frozen in critical section())
2017-11-11 00:58:11 +01:00
Zdenek Kabelac
1ac7fde67b cleanup: remove stack tracing for ok path 2017-11-11 00:56:10 +01:00
Zdenek Kabelac
b9ac1c12d0 cleanup: constify lv parameter 2017-11-11 00:56:10 +01:00
Zdenek Kabelac
05f9acdc7f raid: protect raid4 activation
Move check for presence of raid4 into the right place
so there is no way how to hit activation of any LV
with raid4 on kernel which does not support it.
2017-11-11 00:56:10 +01:00
Ondrej Kozina
f0d1c8429b test: check dmsetup hides integrity encryption keys 2017-11-10 15:23:07 +01:00
Ondrej Kozina
32e747dd31 dmsetup: hide integrity encryption keys on table output 2017-11-10 15:23:00 +01:00
Marian Csontos
512b2adc77 test: D-Bus test_nesting requires filter 2017-11-09 11:57:59 +01:00
Marian Csontos
cd3b5e60a8 test: Rename test nothing methods
These two methods might be useful for debugging, but are not testing
anything.
2017-11-09 11:31:54 +01:00
Heinz Mauelshagen
9958c41927 raid: reject message for 2-legged raid4/5 -> striped
Commit 763db8aab0 rejects 2-legged
conversions to striped/raid0 but different messages are displayed
for raid0 or striped. This commit provides the same rejection messages.
2017-11-08 18:17:26 +01:00
Heinz Mauelshagen
763db8aab0 raid: reject conversion request to striped/raid0 on 2-legged raid4/5
raid4/5 LVs may only be converted to striped or raid0/raid0_meta
in case they have at least 3 legs. 2-legged raid4/5 are a result
of either converting a raid1 to raid4/5 (takeover) or converting
a raid4/5 with more than 2 legs to raid1 with 2 legs (reshape).

The raid4/5 personalities map those as raid1,
thus reject conversion to striped/raid0.

Resolves: rhbz1511047
2017-11-08 17:49:04 +01:00
Zdenek Kabelac
b78add3df5 tests: allow to proceed futher with test
So raid doesn't want to create raid arrays with bigger regionsize ATM,
so just use smaller regionsize.
2017-11-08 00:02:54 +01:00
Zdenek Kabelac
d6473b2018 tests: detect racy systemd version
Systemd 222 has a bug - where it's sometimes umount unpredictibly just
mounted device - skip testing when this happens.
2017-11-08 00:02:54 +01:00
Zdenek Kabelac
2172115379 tests: have some space for 2nd mda
pvcreate with 2MDAs needs some extra space.
2017-11-08 00:02:54 +01:00
Zdenek Kabelac
f7fc7bc44a lvconvert: update delaying message
Make more obvious the operation just got delayed
(using same wording as with thin snapshots)
2017-11-08 00:02:54 +01:00
Zdenek Kabelac
e822a9f38d tests: snasphot merging 2017-11-07 21:34:35 +01:00
Zdenek Kabelac
0c9e3e8df2 coverity: add some initilizers
Coverity cannot do a deeper analyzis so let's make just reports
go away and initialize them to 0.
2017-11-07 21:26:11 +01:00
Zdenek Kabelac
3076a839a5 cleanup: drop unneeded headerfiles
Coverity reported these are no longer in use.
2017-11-07 21:26:11 +01:00
Zdenek Kabelac
f7f2f77dca cleanup: update messages in lvconvert
Use display_lvname and update thin snapshot merge error message.
2017-11-07 21:26:11 +01:00
Zdenek Kabelac
e2fce429cf cleanup: gcc const warning 2017-11-07 21:26:11 +01:00
Zdenek Kabelac
d45a9c0f5b shellcheck: fsadm cleanup
Use some more "" for bash vars
2017-11-07 21:26:11 +01:00
Zdenek Kabelac
0f0baec1f3 blkdeactive: use /sbin for mdamd
Do not using lvm's  @SBINDIR@ for mdadm path.
Set this directly to  /sbin/mdadm like other tools.

Group them separately
2017-11-07 21:26:11 +01:00
Zdenek Kabelac
2354fb3fe4 coverity: avoid overflow_before_widen
TODO: it likely should be checked value is >0...
2017-11-07 21:26:11 +01:00
Zdenek Kabelac
56b527a6fb coverity: avoid memleak
When security_level was set, allocated filename was leaking.
2017-11-07 21:26:09 +01:00
Eric Ren
14d0b0bbdd clvmd: supress ENOENT error on testing connection
In HA cluster, we have "clvm" resource agent to manage clvmd daemon.
The agent invokes clvmd like: "clvmd -T90 -d0", which  always prints
a scaring error message:

"""
local socket: connect failed: No such file or directory
"""

When specifed with "-d" option, clvmd tries to check if an instance
of the clvmd daemon is already running through a testing connection.
The connect() will fail with this ENOENT error in such case, so supress
the error message in such case.

TODO: add missing error reaction code - since ofter log_error, program
is not supposed to continue running (log_error() is for reporting
stopping problems).

Signed-off-by: Eric Ren <zren@suse.com>
2017-11-07 21:24:39 +01:00
Zdenek Kabelac
014122256b snapshot: prevent repeated merging
Check and prevent starting another snapshot merge before
exiting merging is finished.

TODO: we can possibly implement smarter logic to drop existing
merging and start a new one.
2017-11-07 21:22:50 +01:00
Alasdair G Kergon
1b6dfd4802 post-release 2017-11-03 04:42:54 +00:00
Alasdair G Kergon
29300e72ee pre-release 2017-11-03 02:28:55 +00:00
Jonathan Brassow
4129cf5090 testsuite: Forgot to pull 'should's after fixing RAID4/5/6 mismatch test
Test will now fail rather than warn if conditions are not met.
2017-11-02 10:25:46 -05:00
Jonathan Brassow
9e8dec2f38 testsuite: Fix problem when checking RAID4/5/6 for mismatches.
The lvchange-raid[456].sh test checks that mismatches can be detected
properly.  It does this by writing garbage to the back half of one of
the legs directly.  When performing a "check" or "repair" of mismatches,
MD does a good job going directly to disk and bypassing any buffers that
may prevent it from seeing mismatches.  However, in the case of RAID4/5/6
we have the stripe cache to contend with and this is not bypassed.  Thus,
mismatches which have /just/ happened to an area that now populates the
stripe cache may be overlooked.  This isn't a serious issue, however,
because the stripe cache is short-lived and reasonably small.  So, while
there may be a small window of time between the disk changing underneath
the RAID array and when you run a "check"/"repair" - causing a mismatch
to be missed - that would be no worse than if a user had simply run a
"check" a few seconds before the disk changed.  IOW, it simply isn't worth
making a fuss over dropping the stripe cache before beginning a "check" or
"repair" (which we actually did attempt to do a while back).

So, to get the test running smoothly, we simply deactivate and reactivate
the LV to force the stripe cache to be dropped and then proceed.  We could
just as easily wait a few seconds for the stripe cache to empty also.
2017-11-02 09:49:35 -05:00
Jonathan Brassow
5013032845 testsuite: Add and document a 'should' for "idle" -> "recover" RAID test
When a "recover" is just starting for a RAID LV, it is possible to get
"idle" for the sync action if the status is issued quickly enough.  This
is fine, the MD thread just hasn't gotten things going yet.  However,
the /need/ for a "recover" should be marked in md->recovery and it would
be simple enough to fix the kernel so this doesn't happen.  May eventually
want a separate bug for this, but for now it fits with RHBZ 1507719.
2017-11-02 08:53:48 -05:00
Bastian Blank
58b763c99c systemd: remove Install sections from socket-activated services
We always preferred and recommended socket activation for our services
so remove the Install section in related .service units which are unused
in this case and keep only the Install section in associated .socket
units.

Signed-off-by: Bastian Blank <waldi@debian.org>
2017-11-01 15:37:19 +01:00
Zdenek Kabelac
7a394575fb cleanup: use segtype_is_raid_with_meta
Replace with common macro.
2017-11-01 00:59:22 +01:00
Zdenek Kabelac
7a28b243fa cleanup: pvmove messages
Just add some dots to messages and remove unneeded
stack trace from return after log_error.
2017-11-01 00:58:31 +01:00
Zdenek Kabelac
373372c8ab lv_manip: hide layered LV temporarily
Since vg_validate() now rejects LVs without segments and
insert_layer_for_segments_on_pv() gets just created
'layer_lv' without segment,  it needs to be hidden
from vg->lvs during processing of _align_segment_boundary_to_pe_range()
as this function calls  lv_validate() and now requires
vg to be consistent.  LV is then put back into vg->lvs.
2017-11-01 00:55:24 +01:00
Zdenek Kabelac
0ba3939542 pvmove: simplify name generation 2017-11-01 00:55:24 +01:00
Jonathan Brassow
bc29785d09 test: clean-up failing test case and document 'should' cases
There are two known bugs in the lvconvert-raid-status-validation.sh
test.  The first one I consider to be more of an annoyance (1507719).
The second one I consider to be more serious (1507729).

RHBZ 1507719 simply documents the fact that the three RAID status
fields may not always be coherent due to the way they are set and
unset when the MD thread is shutting down and starting up.  For
example, the sync ratio may be 100% but the sync action may not
yet have switched to "idle" and the health characters may not yet
all be 'A's (i.e. the devices set to InSync).

RHBZ 1507729 is more serious.  The sync ratio can be 100% for a
short period of time after upconverting linear -> RAID1.  It is
reset to 0 once the MD sync thread gets to work on it.  It does
this because, technically, the array /is/ in-sync if the new
devices are excluded - i.e. the data is 100% available and
consistent.  I'm not sure what to do about this problem, but we'd
much rather not have this state that looks exactly like the
end of the process when the sync ratio is 100% because the
"recover" process finished, but the sync action and health
characters haven't been updated yet.  Put simply, the problem
is that we can't tell if a sync is starting or finished based
on the status output.
2017-10-30 21:58:38 -05:00
Alasdair G Kergon
248144d066 liblvm: Fix segfault in lvm_pv_remove.
Since 4fa5add6b1 ("pvcreate: Wipe cached
bootloaderarea when wiping label.") label_remove is responsible
for the lvmcache_del.  (toollib and liblvm need fixing to share
the code.)
2017-10-30 22:03:35 +00:00
Zdenek Kabelac
7e3be7d1ba tests: revert commit change
Commit 0424410773 by mistake took also
this unwanted local modification of test - revert it.
2017-10-30 17:27:29 +01:00
Zdenek Kabelac
2b6391538c raid: setup LV size earlier
New validation code which does require to not store LV with no size
(no segments) revealed this size setup code needs to happen
earlier.
2017-10-30 17:23:56 +01:00
Zdenek Kabelac
3efd1f9c6e tests: load reiserfs
Preload reiserfs module for the case, fs is present/compiled for a
kernel but it's not present in memory.

Size reducition needs --yes confirmation to preceed for reiserfs.
2017-10-30 14:39:16 +01:00
Zdenek Kabelac
83d5db056b lvreduce: check LV has segment
Before accessing content make sure LV has segment.
This can be used in case code removes LV without segments
(i.e. on some error path)
2017-10-30 14:39:16 +01:00
Zdenek Kabelac
0424410773 validation: capture store of LV without segment 2017-10-30 14:39:16 +01:00
Zdenek Kabelac
e9206fb93d devcache: track more udev errors
Add a bit more details for failing udev function.
2017-10-30 13:16:50 +01:00
Zdenek Kabelac
af21263cb3 thin: fix merging messages
Correct reported message when thin snapshot has been already merged.
So lvm2 is no longer reporting "Mergins of snapshot X will occur..."
(even with swapped names).
2017-10-30 11:53:39 +01:00
Zdenek Kabelac
90ee7783b4 pool: drop create spare on error path
When thin/cache pool creation fails and command created _pmspare,
such volume is now removed on error path.
2017-10-30 11:53:39 +01:00
Alasdair G Kergon
52fd66210b metadata: Avoid accessing ignored metadata.
When an ignored metadata area gets flagged for use again, make sure the
code doesn't try to parse its old metadata.  Firstly by trying to detect
this situation and skipping the read (while still remembering the
position reached in the circular buffer), and secondly by clearing the
invalid live metadata location on disk as a precaution when subsequently
writing out the precommitted metadata.

Problems showed up when a metadata area in one VG got moved to
another VG in ignored state (still holding metadata for the original
VG) and then later got brought into use in the new VG - only the header
should be read in this case, not any of the metadata content.
2017-10-27 22:53:43 +01:00
Alasdair G Kergon
84aca4201e vgsplit: Fix detection of moved PVs.
vgsplit shares the vg_rename code so that must only set the PV_MOVED_VG
flag introduced in commit 486ed10848
("vgmerge: Fix intermediate metadata corruption") on PVs that moved.
2017-10-27 22:53:43 +01:00
Zdenek Kabelac
eb710cced1 tests: update checked messages 2017-10-27 17:07:42 +02:00
Zdenek Kabelac
63c50ced89 snapshot: relocate common code validation for snapshot origin
Since both lvcreate and lvconvert needs to check for same
type of allowed origin for snapshot - move the code into
a single function.

This way we also fix several inconsitencies where snapshot
has been allowed by mistake either through lvcreate or
lvconvert path.
2017-10-27 17:07:42 +02:00
Zdenek Kabelac
0c68c19c32 man: Makefiles with V=1 support
Generation of man pages is generating lot of barely readable output.
For normal build quietize this a bit.

For original verbose build start to use  'make V=1'
(just like i.e. linux kernel does)

TODO: apply at more places...
2017-10-27 17:05:15 +02:00
Alasdair G Kergon
4b0f6829f6 dmsetup: Add unknown device error to dmsetup status.
Treat status the same way as info if provided device name doesn't exist.
2017-10-26 17:47:13 +01:00
Heinz Mauelshagen
fdcc709ed0 WHATS_NEW: ignore stripes/stripesize on RAID takover 2017-10-26 18:18:24 +02:00
Heinz Mauelshagen
adb80816fb lvcreate: error message with dot. 2017-10-26 17:25:22 +02:00
Heinz Mauelshagen
4a3884245d raid: ignore --stripes/--stripesize on takeover
Converting from one raid level to another, no changes
of stripes or stripesize can be requested because those
are subject to reshaping.  I.e. the process requires to
takeover first and secondly request raid algorithm,
stripe or stripesize changes.

Ignore any related changes display warninngs
and proceed with the takeover.

Without this patch, a takeover requesting
stripesize change causes data corruption!
2017-10-26 17:16:23 +02:00
Zdenek Kabelac
b765288bf2 tests: better clustering support
Use exclusive activation for snapshot conversion since we can only
convert exclusively active volumes.
2017-10-26 14:04:58 +02:00
Zdenek Kabelac
1e80ec8926 tests: allow override of LVM_LOG_FILE_MAX_LINES
Just like with other vars support this:

make check_local T=xyz LVM_LOG_FILE_MAX_LINES=10000000

Allows easily to override existing line limit.
Also increase limiting size of logs per command since some of
our commands are becoming very verbose....
2017-10-26 14:04:58 +02:00
Zdenek Kabelac
04186616be Makefile: help shows hint about LVM_LOG_FILE_MAX_LINES 2017-10-26 14:04:58 +02:00
Zdenek Kabelac
837bfab75c log: better message when reached log limit
Add explaining message, when command was aborted due to the reach
of configure line number count (LVM_LOG_FILE_MAX_LINES)
for logging (used mainly with testing).
2017-10-26 14:04:58 +02:00
Zdenek Kabelac
1758614f96 WHATS_NEW: missed
Last patch missed to mention, we've improved/fixed generated paths
in units and init.d shell scripts when lvm2 was plainly configured
with just i.e. --prefix.

Note: some distros might have fully specified --sbindir and
--usrsbindir - thus those very not seeing problems in generated paths.
2017-10-26 14:04:58 +02:00
Zdenek Kabelac
44c4fe8e61 commands: drop secondary for lvconvert --type snapshot
Both form were marked and secondary thus none of the supported
syntax entered manpage.

This restores appearance of snapshot conversion in man page.
2017-10-25 22:02:54 +02:00
Zdenek Kabelac
35df4b10eb shellcheck: some apostrophe changes and cleanups 2017-10-25 22:02:54 +02:00
Zdenek Kabelac
34618c2d30 scripts: paths update
Correct usage of sbindir also for scripts so the path
no longer needs resolving more vars like exec_prefix & prefix.
2017-10-25 22:02:54 +02:00
Zdenek Kabelac
d809fbb541 systemd: use proper sbindir path
Replace lowercase  @sbindir@  with  @SBINDIR@ which contains
fully decoded path.

Same with  @usrsbindir@ which is also used with clvmd and cmirrord.

Also handle SYSCONFDIR for EnvironmentFile.

Patch fixes generated unit files with strings like:
ExecStart=${exec_prefix}/sbin/lvm
2017-10-25 22:02:54 +02:00
Zdenek Kabelac
3f59969c3f configure: improve support for sbindir path
Introduce few more AC_SUBST vars for usage in *.in generation.

In some case we want to replace i.e. $sbindir with full path
instead of current ${exec_prefix}/sbin.

This patch provides:

USRSBINDIR
SBINDIR
DEFAULT_SYS_LOCK_DIR
SYSCONFDIR

At the same time properly use sbindir & usrsbindir with
lvm, fsadm, clvmd from one primary definition.
2017-10-25 22:02:54 +02:00
Zdenek Kabelac
f32ef63b6c tests: snapshot conversions
Add missing tests for snapshost conversions.
2017-10-25 22:02:24 +02:00
Zdenek Kabelac
0a0cc696ca typo: fix invalid 2017-10-25 22:02:24 +02:00
Zdenek Kabelac
0e7edd1d24 snapshot: improve validation
Do not allow to take snapshot of mirror/raid leg or log or metadata LV.
This was actually never supported, but user was able to create it,
and this put device stack in hardly fixable state (needs manual work).

This prevents such creation to pass.

Also improve validation when recreating snapshot volume type
from origin and COW volume.
2017-10-25 21:58:01 +02:00
Jonathan Brassow
38f7fbac64 clean-up: Correct the comment to match the particular test case 2017-10-24 14:06:44 -05:00
Zdenek Kabelac
10c76ce35a tests:check lvconvert with /dev in vglvname 2017-10-24 16:16:08 +02:00
Zdenek Kabelac
ea63a38f5a lvconvert: fixing extraction of vgname
Correction to function for extracting vgname out of lvconvert
parameters.

Avoid repeating some checks.

Add code to handle generic options which may provide vgname in its argument
and compare them all so they match to a single vgname (otherwise it's a
error).

Extract default (envvar) vgname only when no position nor optional vgname is
found.

Fixing regression instroduce with patchset started with commit:
1e2420bca8   (2.02.169)
2017-10-24 16:16:08 +02:00
Ondrej Kozina
dcc8f90c58 fsadm: refactor resize_crypt function
split resize_crypt function in two.

a) Detect proper dm-crypt device type and count new --size
   value for cryptsetup resize command.

b) Perform the resize
2017-10-24 13:41:16 +02:00
Ondrej Kozina
9916d8fa9a fsadm: rename local variables to avoid confusion 2017-10-24 13:41:09 +02:00
Ondrej Kozina
213cea3aaa test: add regression test for fsadm bug
the bug in LUKS grow/shrink decision in fsadm was
masked due to fact that default LVM2 extent size
was larger than LUKS1 default data offset for dm-crypt
mapping. The new test address this bug.
2017-10-24 13:41:00 +02:00
Ondrej Kozina
af781897fa fsadm: fix bug in LUKS grow/shrink decision branch 2017-10-24 13:40:54 +02:00
Ondrej Kozina
6df7917581 fsadm: add luks specific error message for small devices 2017-10-24 13:40:50 +02:00
Zdenek Kabelac
888dd33148 tests: check stacked cache dataLV of thin-pool 2017-10-23 12:01:16 +02:00
Zdenek Kabelac
df3ff32fc0 lvcreate: skip checking for name restriction for caching
lvcreate supports a 'conversion' when caching LV.
This normally worked fine, however in case passed LV was
thin-pool's data LV with suffix _tdata we have failed to early.

As the easiest fix looks dropping validation of name when
caching type is select - such name check will happen later
once the VG is opened again and properly detect if the LV
with protected name already exists and can be converted,
or will be rejected as ambigiuous operation requiring user
to specify  --type cache | --type cache-pool.
2017-10-23 12:01:15 +02:00
Zdenek Kabelac
d6fcab900b lvextend: detect stacked cache lv used for thinpool
Ensure, that cacheLV is not tried to be resize until full support is
added.
2017-10-23 12:00:43 +02:00
Zdenek Kabelac
de58df390b lvconvert: preserve names of converted LV
When prompting and warning for conversion, remember initial LV names,
so after conversion is finished, correct original names are printed.
2017-10-23 11:58:27 +02:00
Heinz Mauelshagen
d6f4563103 test: remove 'should's from test to test target status race fix 2017-10-19 17:41:44 +02:00
Alasdair G Kergon
f3ae99dcc0 liblvm: Move lib code used exclusively into metadata-liblvm.c
Also remove some redundant function definitions from metadata.h.
2017-10-18 19:29:32 +01:00
Alasdair G Kergon
f1cc5b12fd tidy: Add missing underscores to statics. 2017-10-18 15:58:13 +01:00
Zdenek Kabelac
327d9d59be libdm: fix typo in libdevmapper.pc
Fixing name for RT libraries and using RT_LIBS.
2017-10-18 00:04:06 +02:00
David Teigland
1b319f39d6 lvmlockd: check error for sanlock access to lvmlock LV
When the sanlock daemon does not have permission to access
the lvmlock LV, make the error messages more helpful.
2017-10-17 13:45:53 -05:00
Alasdair G Kergon
146745ad88 device: Separate errors for dev not found and filtered.
Replaced the confusing device error message "not found (or ignored by
filtering)" by either "not found" or "excluded by a filter".
(Later we should be able to say which filter.)

Left the the liblvm code paths alone.
2017-10-17 02:12:41 +01:00
Zdenek Kabelac
1f359f7558 tests: check external origin is monitored 2017-10-16 15:47:46 +02:00
Zdenek Kabelac
186a3da998 thin: monitor also external origin
Add missing monitoring for external origin LVs and add -real suffix
for UUID used for monitoring of external origin.
2017-10-16 15:47:46 +02:00
Marian Csontos
12aff59183 configure: autoreconf 2017-10-16 07:48:23 +02:00
David Teigland
6ac1e04b3a replicator: remove the code
It has not been used in a long time and is not
expected to be used further.
2017-10-13 16:20:42 -05:00
Marian Csontos
e14c0cabd9 Update WHATS_NEW 2017-10-13 13:11:01 +02:00
Heinz Mauelshagen
cf13a30eaa lvcreate: allow 100%FREE creation of "--type mirror" to work
Fixes the following case with 3PVs and 3 legs "mirror" LV:

# lvcreate -l100%FREE --type mirror -m2 vg3
  Insufficient free space for log allocation for logical volume .
  Unable to allocate extents for mirror log.

Related: rhbz1269533
2017-10-12 17:43:24 +02:00
Marian Csontos
ae55b1b20a test: "Disable" lvconvert-raid-reshape
...when running from ramdisk. This causes test failure, so it is kept on
eyes.
2017-10-12 10:55:02 +02:00
Ondrej Kozina
71261ae374 test: update fsadm-crypt to pass with legacy cryptsetup 2017-10-11 14:39:33 +02:00
Ondrej Kozina
f5beb58561 fsadm: implement 'check' for a fs on top of LUKS 2017-10-11 14:39:22 +02:00
Ondrej Kozina
c795a3b37f fsadm: fail 'check' explicitly on unsupported fs type 2017-10-11 14:39:15 +02:00
Ondrej Kozina
74fd0dd6c4 fsadm: respect DM_DEV_DIR variable 2017-10-11 14:39:07 +02:00
Zdenek Kabelac
e02e5b0c5b activation: fix activation lock
Activation lock has a primary purpose to serialize locking of individual
LV in case there is no other protecting mechanism for parallel
execution.

However in the case an activated LV is composed from several other LVs,
noone should be able to manipulate with those LVs as well.

This patch add a very 'naive' global VG activation locking in this case.
In the future we may introduce smarter function detecting minimal closed
graph components if this will appear as bottleneck

Patch checks if the  VG Write lock is held - in this case we do not
need any more locking - command has exclusive access to VG.

In case we have clustered VG and we are activating an LV which does not
need other LVs - we also do not need any more locks.

In all other cases take respective lock - for single LV - use lvid,
for complex LVs  use vgname.
2017-10-11 14:24:28 +02:00
Zdenek Kabelac
9bd7615fef activation: fix locking resource name for activation lock
Avoid cutting away 1st. character for activation lock.
Unlike with VG name locks like #orphan we should not cut-off 1st.
characted from resource name.
2017-10-11 14:24:28 +02:00
Ondrej Kozina
e61313843f test: fsadm-crypt.sh fixes
use conservative 'luksOpen' command instead
fix reiserfs tests on plain crypt devices
2017-10-10 15:47:04 +02:00
Ondrej Kozina
59145715f1 fsadm: minor fixes for crypt support
drop useless asterisk expansion
enforce matching string prefix by adding ^
2017-10-10 15:21:35 +02:00
Ondrej Kozina
27ef503b35 test: add fsadm crypt test 2017-10-10 14:19:07 +02:00
Ondrej Kozina
30293baaa0 fsadm: add support for crypt devices 2017-10-10 14:18:57 +02:00
Alasdair G Kergon
22789563de thin: Improve overprovisioning and repair warnings. 2017-10-09 19:48:00 +01:00
Heinz Mauelshagen
3a639d8144 raid: cleanup raid4/5/6/10 validation check 2017-10-09 16:13:45 +02:00
Heinz Mauelshagen
44275c763c raid: fix validation check for raid0 segment data_offset member
Commit 2f754b73ff missed one.
2017-10-09 16:03:35 +02:00
Heinz Mauelshagen
5f13e33d54 lvcreate: fix region size on striped RaidLVs
Creating striped RaidLVs with lv size not divisible by region size
caused the region size to be adjusted:

# lvcreate   --type raid5 -n region_check.32.00m_3 -i 3 -L 1g --nosync -R 32.00m raid_sanity
  Using default stripesize 64.00 KiB.
  Rounding size 1.00 GiB (256 extents) up to stripe boundary size <1.01 GiB(258 extents).
  WARNING: New raid5 won't be synchronised. Don't read what you didn't write!
  Using reduced mirror region size of 8.00 MiB
  Logical volume region_check.32.00m_3 created.

Fix by not imposing "mirror" constraints on "raid".

Resolves: rhbz1404007
2017-10-09 14:35:06 +02:00
Heinz Mauelshagen
2f754b73ff raid: fix validation checks for segment data_offset member
Commit 222e1e3ace was not
valuing special case of data_ofset member equal to 1.
2017-10-09 14:01:23 +02:00
Heinz Mauelshagen
554a761db2 raid: return previous reshape space allocation properly
Fix returning previous allocation of reshape space.
2017-10-09 13:55:01 +02:00
Alasdair G Kergon
f005a6e792 man: regenerate 2017-10-06 23:10:02 +01:00
Alasdair G Kergon
a994fc5e50 post-release 2017-10-06 22:16:58 +01:00
Alasdair G Kergon
73b72b8331 pre-release 2017-10-06 22:10:48 +01:00
Alasdair G Kergon
486ed10848 vgmerge: Fix intermediate metadata corruption
vgmerge suffers from a similar problem to the one fixed in commit
8146548d25 ("vgsplit: Fix intermediate
metadata corruption.")

When merging, splitting or renaming VGs, use a new PV status flag
PV_MOVED_VG to mark the PVs that hold metadata with the old VG name and
use this to provide PV-level granularity instead of incorrectly assuming
all PVs in the VG are the same.
2017-10-06 02:20:45 +01:00
Thomas Lamprecht
a781b1c178 dmeventd: ensure systemd service gets stopped on shutdown
Add these for dmeventd systemd unit (dm-event.service):

  Before: shutdown.target
  Conflicts: shutdown.target

This will cause the dmeventd to be properly stopped at shutdown (after
all the dmeventd clients unregistered their devices from monitoring)
with dm-event.service's stop action (there's no direct action defined
for the "stop" so systemd sends SIGTERM instead).

Before, we let dmeventd to get killed only as part of the very last
SIGTERM/SIGKILL for all the remaining processes late in the shutdown
sequence so we may have missed some logs if dmeventd encountered an
error during its shutdown (logging facilities are already off at this
late time in shutdown sequence).

Ref: https://www.redhat.com/archives/lvm-devel/2017-October/msg00000.html
2017-10-05 13:01:55 +02:00
Zdenek Kabelac
3ae8adce92 fsadm: add --help
Newer version of blockdev tool needs --help as the tool is no longer
printing help without this option like it used to in past.
2017-10-05 10:23:20 +02:00
Zdenek Kabelac
9940c2f754 dmeventd: schedule exit on break
When dmeventd receives SIGTERM/INT/HUP/QUIT it validates if exit is possible.
If there was any device still monitored, such exit request used to
be ignored/refused. This 'usually' worked reasonably well, however if there
is very short time period between last device is unmonitored and signal
reception - there was possibility such EXIT was ignored, as dmeventd has
not yet got into idle state even commands like 'vgchange -an' has already
finished.

This patch changes logic towards scheduling EXIT to the nearest
point when there is no monitored device.

EXIT is never forgotten.

NOTE: if there is only a single monitored device and someone sends
SIGTERM and later someone uses i.e. 'lvchange --refresh' after
unmonitoring dmeventd will exit and new instance needs to be
started.
2017-10-05 10:19:21 +02:00
Heinz Mauelshagen
a95f656d0d raid: enhance conversion rejection message
Related: rhbz1439399
2017-10-04 17:05:59 +02:00
Tony Asleson
32c87d56b1 lvmdbusd: thread stacks dump support
If you send a SIGUSR1 (10) to the daemon it will dump all the
threads current stacks to stdout.  This will be useful when the
daemon is apparently hung and not processing requests.

eg.
$ sudo kill -10 <daemon pid>
2017-09-27 07:45:00 -05:00
Tony Asleson
60e3dbd6d5 lvmdbusd: Give threads names
This will allow easier debug.
2017-09-27 07:45:00 -05:00
Tony Asleson
2074094e77 lvmdbusd: Main thread exception logging
Make sure that any and all code that executes in the main thread is
wrapped with a try/except block to ensure that at the very least
we log when things are going wrong.
2017-09-27 07:45:00 -05:00
Marian Csontos
090db98828 pvmove: Change error message
Change error message to match previously used one used by tests.
2017-09-27 13:20:25 +02:00
Alasdair G Kergon
8146548d25 vgsplit: Fix intermediate metadata corruption.
Changing the VG of a PV uses the same on-disk mechanism as vgrename.
This relies on recognising both the old and new VG names.  Prior to this
patch the vgsplit code incorrectly provided the new VG name twice
instead of the old and new ones.  This lead the low-level mechanism not
to recognise the device as already belonging to a VG and so paying no
attention to the location of its existing metadata, sometimes partly
overwriting it and then later trying to read the corrupt metadata and
issuing a checksum error.
2017-09-22 18:34:34 +01:00
Peter Rajnoha
b0f4e0fcec blkdeactivate: cleanup
Use a different variable for translated name in dm/md_deactivate fn
Fix dmraidoption/dmraidoptions in man page.
2017-09-22 11:10:57 +02:00
Tony Asleson
bdccab07f9 lvmdbusd: Improve args comparison 2017-09-21 14:35:36 -05:00
Tony Asleson
7a6e438df8 lvmdbusd: Ensure vg_uuid is present
In some cases we are seeing where there are no VGs, but the data returned from
lvm shows that the PVs have the following for the VG:

"vg_name":"[unknown]", "vg_uuid":""

The code was only checking for the exitence of the VG name and we called into
the function get_object_path_by_uuid_lvm_id which requires both the VG name and
the LV name to exist (asserts this) which results in the following stack trace:

Traceback (most recent call last):
  File "/home/tasleson/lvm2/daemons/lvmdbusd/utils.py", line 563, in runner
    obj._run()
  File "/home/tasleson/lvm2/daemons/lvmdbusd/utils.py", line 584, in _run
    self.rc = self.f(*self.args)
  File "/home/tasleson/lvm2/daemons/lvmdbusd/fetch.py", line 26, in
		_main_thread_load
    cache_refresh=False)[1]
  File "/home/tasleson/lvm2/daemons/lvmdbusd/pv.py", line 48, in load_pvs
    emit_signal, cache_refresh)
  File "/home/tasleson/lvm2/daemons/lvmdbusd/loader.py", line 37, in common
    objects = retrieve(search_keys, cache_refresh=False)
  File "/home/tasleson/lvm2/daemons/lvmdbusd/pv.py", line 40, in
		pvs_state_retrieve
    p["pv_attr"], p["pv_tags"], p["vg_name"], p["vg_uuid"]))
  File "/home/tasleson/lvm2/daemons/lvmdbusd/pv.py", line 84, in __init__
    vg_uuid, vg_name, vg_obj_path_generate)
  File "/home/tasleson/lvm2/daemons/lvmdbusd/objectmanager.py", line 318,
		in get_object_path_by_uuid_lvm_id
    assert uuid
AssertionError
2017-09-21 14:35:36 -05:00
Tony Asleson
e3965d392c lvmdbusd: Fix hang in MThreadRunner
When executing in the main thread, if we encounter an exception we
will bypass the notify_all call on the condition and the calling thread
never wakes up.

@staticmethod
    def runner(obj):
        # noinspection PyProtectedMember
Exception thrown here
 ----> obj._run()
So the following code doesn't run, which causes calling thread to hang
	with obj.cond:
            obj.function_complete = True
            obj.cond.notify_all()

Additionally for some unknown reason the stderr is lost.
Best guess is it's something to do with scheduling a python function
into the GLib.idle_add.  That made finding issue quite difficult.
2017-09-21 14:35:36 -05:00
Peter Rajnoha
096fcb5a6e blkdeactivate: also try to unmount /boot on blkdeactivate -u if on top of supported device
There's nothing special about /boot other than it's used during boot.
But when blkdeactivate is called either on all devices or including a
device where the /boot is on top, we should also include this mount
point when doing unmount before deactivation of supported devices.
2017-09-21 17:15:48 +02:00
Peter Rajnoha
584b4ae38b blk-availability: use blkdeactivate -r wait in blk-availability systemd service/initscript 2017-09-21 17:10:15 +02:00
Peter Rajnoha
95087c8f96 blkdeactivate: add blkdeactivate -r wait option to wait for MD resync/recovery/reshape
The new blkdeactivate -r|mdraidoption wait causes blkdeactivate to wait
for any resync/recovery/reshape that is currently in progress before
deactivating the device.

If this option is used, blkdeactivate calls mdadm -W|--wait before
mdadm -S|--stop.
2017-09-21 17:10:15 +02:00
Peter Rajnoha
7fa0d52262 blkdeactivate: fix blkdeactivate regression with failing DM/MD devs deactivation
Revert dc50f2f4a0.

We're canonicalizing/escaping the names here and we're reusing the
variable name so the code doesn't need to use extra variables and
further assignments that may confuse us. Let's keep the code simple.

The

   local name=(...$name)

is not the same as

   local name
   name=(...$name)

(I know various code-checking tools fuss about this and recommend
the 2nd way, but let's ignore those tools' nitpicking here please.)
2017-09-21 17:10:15 +02:00
Peter Rajnoha
9f34125d5d blkdeactivate: fix --{dm,lvm,mpath}options option name recognition
There was a typo in blkdeactivate --dmoption/--lvmoption/mpathoption,
it had missing "s" at the end and it was not recognized properly, only
short names for the options (-d/-l/-m).
2017-09-21 17:08:44 +02:00
David Teigland
0ab9e4b6a7 improve error messages when command rules fail
When certain cmd def RULE's fail, the error messages can
sometimes be confusing.  This expands the error messages
to help clarify why the rule failed, especially in cases
where options are used incorrectly.
2017-09-20 11:10:35 -05:00
David Teigland
f2ee0e7aca pvmove: require LV name in a shared VG
In a shared VG, only allow pvmove with a named LV,
so that only PE's used by the LV will be moved.
The LV is then activated exclusively, ensuring that
the PE's being moved are not used from another host.

Previously, pvmove was mistakenly allowed on a full PV.
This won't work when LVs using that PV are active on
other hosts.
2017-09-20 09:56:51 -05:00
David Teigland
518a8e8cfb lvmlockd: activate mirror LVs in shared mode with cmirrord
Previously lvmlockd disallowed mirror LVs to be activated
in shared mode.
2017-09-20 09:55:34 -05:00
Zdenek Kabelac
d9f9ce1268 tests: use portable df output
For awk parsing more 'predictable' format of output line is needed.
2017-09-20 15:34:02 +02:00
Zdenek Kabelac
a15b796146 tests: repair of cachepool 2017-09-20 15:14:16 +02:00
Zdenek Kabelac
2fd2b197ab tests: check for free space in test dir
Avoid starting test, when test dir has less then 50M of free space.
Better to crash early before letting die machine on weird crash
in OOM cases...

Also show free disk space when test starts
2017-09-20 15:14:16 +02:00
Zdenek Kabelac
a02db1c45a libdm: fix parentheses in assignment + comparison
As reported, fix incorrect placement of parentheses.
TODO: add testing code.
2017-09-20 15:14:16 +02:00
Zdenek Kabelac
a65649b45d lvconvert: support repair of cache/cachepool
Extend repair for cache and cachepool target
and user 'lvconvert_repair' routine name.
2017-09-20 15:14:16 +02:00
Zdenek Kabelac
aeb4f2bf3d lvconvert: repair supports poometadatespare switch
Enable handling of  --poolmetadataspare so if user can prevent
creation of _pmspare volume during --repair operation (just
like during actual lvcreate or lvconvert) for pool volumes.
2017-09-20 15:14:16 +02:00
Marian Csontos
03bcd29481 configure: Add warning for D-Bus w/o notifications
Building D-Bus support without notifications is not recommended.
2017-09-20 13:07:06 +02:00
Marian Csontos
0c46f9cf0c configure: Update configure 2017-09-20 13:07:06 +02:00
Marian Csontos
8493df8edd liblvm: Add deprecation warning to configure 2017-09-20 13:07:06 +02:00
Marian Csontos
4cbacf6bac liblvm: Deprecate library and bindings
It is recommended to use D-Bus API instead of liblvm2app or python
bindings.
2017-09-20 13:07:06 +02:00
David Teigland
5407327bc6 toollib: fix parentheses in assignment + comparison 2017-09-19 09:19:24 -05:00
David Teigland
200793edc6 man pvscan: just write --activate in description
rather than -a|--activate, because web pages on
man7.org are not printing the | character.
2017-09-18 10:56:39 -05:00
Alasdair G Kergon
d73eddcdc0 dmsetup: Process -S to filter input device lists.
The following commands now pass the device list through a
--select|-S filter before processing:

suspend resume clear wipe_table remove deps status table
2017-09-14 23:41:17 +01:00
David Teigland
8e8755319c lvcreate: use cmd defs to deny unspported lockd cases
In a shared VG, lvconvert must be used to create thin pools
and cache pools, not the lvcreate variants of those commands.
Deny these cases early in lvcreate using the new command defs.
Denying these cases deeper in the code was missing some
cleanup of the partially completed command.
2017-09-14 12:28:48 -05:00
David Teigland
d93a2bb741 revert tidy: prefer not using else after return
Revert the lvmlockd.c changes from:
  commit 0bf836aa14
  "tidy: prefer not using else after return"

The commit introduced at least one regression, which broke
lvcreate of a thin pool in a shared VG.
2017-09-14 12:28:48 -05:00
Alasdair G Kergon
29834b6e91 kernel: Refresh dm-ioctl.h
Adds DM_DEV_ARM_POLL
2017-09-14 17:50:56 +01:00
Alasdair G Kergon
71dbe0fe26 post-release 2017-09-13 20:46:25 +01:00
Alasdair G Kergon
918f0a92da pre-release 2017-09-13 20:38:29 +01:00
Alasdair G Kergon
901c919d22 README: Include new github links
We've decided to mirror the upstream code onto github for the
convenience of people already using that platform.
2017-09-13 20:20:04 +01:00
Zdenek Kabelac
03efec2712 deamonize: restore detection of errors
Keep forked environment for daemon more strick and check even
for nearly impossible to happen errors.
2017-09-06 11:47:53 +02:00
David Teigland
3071837e21 lvmlockd: always disallow mirror splitting
lv_raid_split() was correctly prevented in a shared VG,
but lv_raid_split_and_track() was missing that check.
2017-09-05 10:28:33 -05:00
David Teigland
09c792c206 lvmlockd: fix check for no running lock manager
In some cases it was reporting there was no running
lock manager when there is.
2017-08-29 15:18:12 -05:00
270 changed files with 8592 additions and 5834 deletions

13
README
View File

@@ -8,10 +8,15 @@ There is no warranty - see COPYING and COPYING.LIB.
Tarballs are available from:
ftp://sourceware.org/pub/lvm2/
ftp://sources.redhat.com/pub/lvm2/
https://github.com/lvmteam/lvm2/releases
The source code is stored in git:
https://sourceware.org/git/?p=lvm2.git
git clone git://sourceware.org/git/lvm2.git
mirrored to:
https://github.com/lvmteam/lvm2
git clone https://github.com/lvmteam/lvm2.git
git clone git@github.com:lvmteam/lvm2.git
Mailing list for general discussion related to LVM2:
linux-lvm@redhat.com
@@ -29,6 +34,14 @@ and multipath-tools:
dm-devel@redhat.com
Subscribe from https://www.redhat.com/mailman/listinfo/dm-devel
Website:
https://sourceware.org/lvm2/
Report upstream bugs at:
https://bugzilla.redhat.com/enter_bug.cgi?product=LVM%20and%20device-mapper
or open issues at:
https://github.com/lvmteam/lvm2/issues
The source code repository used until 7th June 2012 is accessible here:
http://sources.redhat.com/cgi-bin/cvsweb.cgi/LVM2/?cvsroot=lvm2.

62
TESTING Normal file
View File

@@ -0,0 +1,62 @@
LVM2 Test Suite
===============
The codebase contains many tests in the test subdirectory.
Before running tests
--------------------
Keep in mind the testsuite MUST run under root user.
It is recommended not to use LVM on the test machine, especially when running
tests with udev (`make check_system`.)
You MUST disable (or mask) any LVM daemons:
- lvmetad
- dmeventd
- lvmpolld
- lvmdbusd
- lvmlockd
- clvmd
- cmirrord
For running cluster tests, we are using singlenode locking. Pass
`--with-clvmd=singlenode` to configure.
NOTE: This is useful only for testing, and should not be used in produciton
code.
To run D-Bus daemon tests, existing D-Bus session is required.
Running tests
-------------
As root run:
make check
To run only tests matching a string:
make check T=test
To skip tests matching a string:
make check S=test
There are other targets and many environment variables can be used to tweak the
testsuite - for full list and description run `make -C test help`.
Installing testsuite
--------------------
It is possible to install and run a testsuite against installed LVM. Run the
following:
make -C test install
Then lvm2-testsuite binary can be executed to test installed binaries.
See `lvm2-testsuite --help` for options. The same environment variables can be
used as with `make check`.

View File

@@ -1 +1 @@
2.02.174(2)-git (2017-07-20)
2.02.178(2)-git (2017-12-18)

View File

@@ -1 +1 @@
1.02.143-git (2017-07-20)
1.02.147-git (2017-12-18)

111
WHATS_NEW
View File

@@ -1,5 +1,112 @@
Version 2.02.174 -
=================================
Version 2.02.178 -
=====================================
Add devices/use_aio, aio_max, aio_memory to configure AIO limits.
Support asynchronous I/O when scanning devices.
Detect asynchronous I/O capability in configure or accept --disable-aio.
Add AIO_SUPPORTED_CODE_PATH to indicate whether AIO may be used.
Configure ensures /usr/bin dir is checked for dmpd tools.
Restore pvmove support for wide-clustered active volumes (2.02.177).
Avoid non-exclusive activation of exclusive segment types.
Fix trimming sibling PVs when doing a pvmove of raid subLVs.
Preserve exclusive activation during thin snaphost merge.
Suppress some repeated reads of the same disk data at the device layer.
Avoid exceeding array bounds in allocation tag processing.
Refactor metadata reading code to use callback functions.
Move memory allocation for the key dev_reads into the device layer.
Version 2.02.177 - 18th December 2017
=====================================
When writing text metadata content, use complete 4096 byte blocks.
Change text format metadata alignment from 512 to 4096 bytes.
When writing metadata, consistently skip mdas marked as failed.
Refactor and adjust text format metadata alignment calculation.
Fix python3 path in lvmdbusd to use value detected by configure.
Reduce checks for active LVs in vgchange before background polling.
Ensure _node_send_message always uses clean status of thin pool.
Fix lvmlockd to use pool lock when accessing _tmeta volume.
Report expected sanlock_convert errors only when retries fail.
Avoid blocking in sanlock_convert on SH to EX lock conversion.
Deactivate missing raid LV legs (_rimage_X-missing_Y_Z) on decativation.
Skip read-modify-write when entire block is replaced.
Categorise I/O with reason annotations in debug messages.
Allow extending of raid LVs created with --nosync after a failed repair.
Command will lock memory only when suspending volumes.
Merge segments when pvmove is finished.
Remove label_verify that has never been used.
Ensure very large numbers used as arguments are not casted to lower values.
Enhance reading and validation of options stripes and stripes_size.
Fix printing of default stripe size when user is not using stripes.
Activation code for pvmove automatically discovers holding LVs for resume.
Make a pvmove LV locking holder.
Do not change critical section counter on resume path without real resume.
Enhance activation code to automatically suspend pvmove participants.
Prevent conversion of thin volumes to snapshot origin when lvmlockd is used.
Correct the steps to change lock type in lvmlockd man page.
Retry lock acquisition on recognized sanlock errors.
Fix lock manager error codes in lvmlockd.
Remove unnecessary single read from lvmdiskscan.
Check raid reshape flags in vg_validate().
Add support for pvmove of cache and snapshot origins.
Avoid using precommitted metadata for suspending pvmove tree.
Ehnance pvmove locking.
Deactivate activated LVs on error path when pvmove activation fails.
Add "io" to log/debug_classes for logging low-level I/O.
Eliminate redundant nested VG metadata in VG struct.
Avoid importing persistent filter in vgscan/pvscan/vgrename.
Fix memleak of string buffer when vgcfgbackup runs in secure mode.
Do not print error when clvmd cannot find running clvmd.
Prevent start of new merge of snapshot if origin is already being merged.
Fix offered type for raid6_n_6 to raid5 conversion (raid5_n).
Deactivate sub LVs when removing unused cache-pool.
Do not take backup with suspended devices.
Avoid RAID4 activation on incompatible kernels under all circumstances.
Reject conversion request to striped/raid0 on 2-legged raid4/5.
Version 2.02.176 - 3rd November 2017
====================================
Keep Install section only in lvm2-{lvmetad,lvmpolld}.socket systemd unit.
Fix segfault in lvm_pv_remove in liblvm. (2.02.173)
Do not allow storing VG metadata with LV without any segment.
Fix printed message when thin snapshot was already merged.
Remove created spare LV when creation of thin-pool failed.
Avoid reading ignored metadata when mda gets used again.
Fix detection of moved PVs in vgsplit. (2.02.175)
Ignore --stripes/--stripesize on RAID takeover
Improve used paths for generated systemd units and init shells.
Disallow creation of snapshot of mirror/raid subLV (was never supported).
Fix regression in more advanced vgname extraction in lvconvert (2.02.169).
Allow lvcreate to be used for caching of _tdata LV.
Avoid internal error when resizing cache type _tdata LV (not yet supported).
Show original converted names when lvconverting LV to pool volume.
Move lib code used only by liblvm into metadata-liblvm.c.
Distinguish between device not found and excluded by filter.
Monitor external origin LVs.
Remove the replicator code, including configure --with-replicators.
Allow lvcreate --type mirror to work with 100%FREE.
Improve selection of resource name for complex volume activation lock.
Avoid cutting first character of resource name for activation lock.
Support for encrypted devices in fsadm.
Improve thin pool overprovisioning and repair warning messages.
Fix incorrect adjustment of region size on striped RaidLVs.
Version 2.02.175 - 6th October 2017
===================================
Use --help with blockdev when checking for --getsize64 support in fsadm.
Dump lvmdbusd debug information with SIGUSR1.
Fix metadata corruption in vgsplit and vgmerge intermediate states.
Add PV_MOVED_VG PV status flag to mark PVs moving between VGs.
Fix lvmdbus hang and recognise unknown VG correctly.
Improve error messages when command rules fail.
Require LV name with pvmove in a shared VG.
Allow shared active mirror LVs with lvmlockd, dlm, and cmirrord.
Support lvconvert --repair with cache and cachepool volumes.
lvconvert --repair respects --poolmetadataspare option.
Mark that we don't plan to develop liblvm2app and python bindings any further.
Fix thin pool creation in shared VG. (2.02.173)
Version 2.02.174 - 13th September 2017
======================================
Prevent raid1 split with trackchanges in a shared VG.
Avoid double unlocking of client & lockspace mutexes in lvmlockd.
Fix leaking of file descriptor for non-blocking filebased locking.
Fix check for 2nd mda at end of disk fits if using pvcreate --restorefile.

View File

@@ -1,5 +1,35 @@
Version 1.02.143 -
=================================
Version 1.02.147 -
=====================================
Parsing mirror status accepts 'userspace' keyword in status.
Introduce dm_malloc_aligned for page alignment of buffers.
Version 1.02.146 - 18th December 2017
=====================================
Activation tree of thin pool skips duplicated check of pool status.
Remove code supporting replicator target.
Do not ignore failure of _info_by_dev().
Propagate delayed resume for pvmove subvolumes.
Suppress integrity encryption keys in 'table' output unless --showkeys supplied.
Version 1.02.145 - 3rd November 2017
====================================
Keep Install section only in dm-event.socket systemd unit.
Issue a specific error with dmsetup status if device is unknown.
Fix RT_LIBS reference in generated libdevmapper.pc for pkg-config
Version 1.02.144 - 6th October 2017
===================================
Schedule exit when received SIGTERM in dmeventd.
Also try to unmount /boot on blkdeactivate -u if on top of supported device.
Use blkdeactivate -r wait in blk-availability systemd service/initscript.
Add blkdeactivate -r wait option to wait for MD resync/recovery/reshape.
Fix blkdeactivate regression with failing DM/MD devs deactivation (1.02.142).
Fix typo in blkdeactivate's '--{dm,lvm,mpath}options' option name.
Correct return value testing when get reserved values for reporting.
Take -S with dmsetup suspend/resume/clear/wipe_table/remove/deps/status/table.
Version 1.02.143 - 13th September 2017
======================================
Restore umask when creation of node fails.
Add --concise to dmsetup create for many devices with tables in one command.
Accept minor number without major in library when it knows dm major number.

View File

@@ -59,6 +59,22 @@ devices {
# This configuration option is advanced.
scan = [ "/dev" ]
# Configuration option devices/use_aio.
# Use linux asynchronous I/O for parallel device access where possible.
# This configuration option has an automatic default value.
# use_aio = 1
# Configuration option devices/aio_max.
# Maximum number of asynchronous I/Os to issue concurrently.
# This configuration option has an automatic default value.
# aio_max = 128
# Configuration option devices/aio_memory.
# Approximate maximum total amount of memory (in MB) used
# for asynchronous I/O buffers.
# This configuration option has an automatic default value.
# aio_memory = 10
# Configuration option devices/obtain_device_list_from_udev.
# Obtain the list of available devices from udev.
# This avoids opening or using any inapplicable non-block devices or
@@ -611,9 +627,9 @@ log {
# Select log messages by class.
# Some debugging messages are assigned to a class and only appear in
# debug output if the class is listed here. Classes currently
# available: memory, devices, activation, allocation, lvmetad,
# available: memory, devices, io, activation, allocation, lvmetad,
# metadata, cache, locking, lvmpolld. Use "all" to see everything.
debug_classes = [ "memory", "devices", "activation", "allocation", "lvmetad", "metadata", "cache", "locking", "lvmpolld", "dbus" ]
debug_classes = [ "memory", "devices", "io", "activation", "allocation", "lvmetad", "metadata", "cache", "locking", "lvmpolld", "dbus" ]
}
# Configuration section backup.

199
configure vendored
View File

@@ -643,6 +643,7 @@ LVMETAD_PIDFILE
DMEVENTD_PIDFILE
WRITE_INSTALL
VALGRIND_POOL
USRSBINDIR
USE_TRACKING
UDEV_HAS_BUILTIN_BLKID
UDEV_RULE_EXEC_DETECTION
@@ -656,8 +657,10 @@ TESTING
STATIC_LINK
STATICDIR
SNAPSHOTS
SYSCONFDIR
SELINUX_PC
SELINUX_LIBS
SBINDIR
REPLICATORS
READLINE_LIBS
RT_LIBS
@@ -704,12 +707,15 @@ FSADM
ELDFLAGS
DM_LIB_PATCHLEVEL
DMEVENTD_PATH
AIO_LIBS
DL_LIBS
AIO
DEVMAPPER
DEFAULT_USE_LVMLOCKD
DEFAULT_USE_LVMPOLLD
DEFAULT_USE_LVMETAD
DEFAULT_USE_BLKID_WIPING
DEFAULT_SYS_LOCK_DIR
DEFAULT_SYS_DIR
DEFAULT_SPARSE_SEGTYPE
DEFAULT_RUN_DIR
@@ -920,7 +926,6 @@ with_mirrors
with_raid
with_default_mirror_segtype
with_default_raid10_segtype
with_replicators
with_default_sparse_segtype
with_thin
with_thin_check
@@ -951,6 +956,7 @@ enable_profiling
enable_testing
enable_valgrind_pool
enable_devmapper
enable_aio
enable_lvmetad
enable_lvmpolld
enable_lvmlockd_sanlock
@@ -1689,6 +1695,7 @@ Optional Features:
--enable-testing enable testing targets in the makefile
--enable-valgrind-pool enable valgrind awareness of pools
--disable-devmapper disable LVM2 device-mapper interaction
--disable-aio disable asynchronous I/O
--enable-lvmetad enable the LVM Metadata Daemon
--enable-lvmpolld enable the LVM Polling Daemon
--enable-lvmlockd-sanlock
@@ -1755,7 +1762,6 @@ Optional Packages:
default mirror segtype: raid1/mirror [raid1]
--with-default-raid10-segtype=TYPE
default mirror segtype: raid10/mirror [raid10]
--with-replicators=TYPE replicator support: internal/shared/none [none]
--with-default-sparse-segtype=TYPE
default sparse segtype: thin/snapshot [thin]
--with-thin=TYPE thin provisioning support: internal/shared/none
@@ -3177,6 +3183,7 @@ case "$host_os" in
LDDEPS="$LDDEPS .export.sym"
LIB_SUFFIX=so
DEVMAPPER=yes
AIO=yes
BUILD_LVMETAD=no
BUILD_LVMPOLLD=no
LOCKDSANLOCK=no
@@ -3196,6 +3203,7 @@ case "$host_os" in
CLDNOWHOLEARCHIVE=
LIB_SUFFIX=dylib
DEVMAPPER=yes
AIO=no
ODIRECT=no
DM_IOCTLS=no
SELINUX=no
@@ -4367,6 +4375,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
CFLAGS=$save_CFLAGS
CXXFLAGS=$save_CXXFLAGS
PATH_SBIN="$PATH:/usr/sbin:/sbin"
ac_ext=c
@@ -8240,9 +8249,14 @@ $as_echo "$ac_cv_flag_HAVE_FULL_RELRO" >&6; }
################################################################################
if test "$prefix" = NONE; then
datarootdir=${ac_default_prefix}/share
fi
################################################################################
test "$exec_prefix" = NONE -a "$prefix" = NONE && exec_prefix=""
test "x$prefix" = xNONE && prefix=$ac_default_prefix
# Let make expand exec_prefix.
test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking file owner" >&5
@@ -8566,28 +8580,6 @@ _ACEOF
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to include replicators" >&5
$as_echo_n "checking whether to include replicators... " >&6; }
# Check whether --with-replicators was given.
if test "${with_replicators+set}" = set; then :
withval=$with_replicators; REPLICATORS=$withval
else
REPLICATORS=none
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $REPLICATORS" >&5
$as_echo "$REPLICATORS" >&6; }
case "$REPLICATORS" in
none|shared) ;;
internal)
$as_echo "#define REPLICATOR_INTERNAL 1" >>confdefs.h
;;
*) as_fn_error $? "--with-replicators parameter invalid ($REPLICATORS)" "$LINENO" 5 ;;
esac
# Check whether --with-default-sparse-segtype was given.
if test "${with_default_sparse_segtype+set}" = set; then :
@@ -8687,7 +8679,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -8730,7 +8722,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -8811,7 +8803,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -8854,7 +8846,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -8918,7 +8910,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -8961,7 +8953,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -9025,7 +9017,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -9068,7 +9060,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -9236,7 +9228,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -9279,7 +9271,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -9372,7 +9364,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -9415,7 +9407,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -9479,7 +9471,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -9522,7 +9514,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -9586,7 +9578,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -9629,7 +9621,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -11840,6 +11832,67 @@ $as_echo "#define DEVMAPPER_SUPPORT 1" >>confdefs.h
fi
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use asynchronous I/O" >&5
$as_echo_n "checking whether to asynchronous I/O... " >&6; }
# Check whether --enable-aio was given.
if test "${enable_aio+set}" = set; then :
enableval=$enable_aio; AIO=$enableval
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $AIO" >&5
$as_echo "$AIO" >&6; }
if test "$AIO" = yes; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for io_setup in -laio" >&5
$as_echo_n "checking for io_setup in -laio... " >&6; }
if ${ac_cv_lib_aio_io_setup+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-laio $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char io_setup ();
int
main ()
{
return io_setup ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_lib_aio_io_setup=yes
else
ac_cv_lib_aio_io_setup=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_aio_io_setup" >&5
$as_echo "$ac_cv_lib_aio_io_setup" >&6; }
if test "x$ac_cv_lib_aio_io_setup" = xyes; then :
$as_echo "#define AIO_SUPPORT 1" >>confdefs.h
AIO_LIBS="-laio"
AIO_SUPPORT=yes
else
AIO_LIBS=
AIO_SUPPORT=no
fi
fi
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build LVMetaD" >&5
$as_echo_n "checking whether to build LVMetaD... " >&6; }
@@ -12791,6 +12844,10 @@ $as_echo "$APPLIB" >&6; }
test "$APPLIB" = yes \
&& LVM2APP_LIB=-llvm2app \
|| LVM2APP_LIB=
if test "$APPLIB"; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: liblvm2app is deprecated. Use D-Bus API" >&5
$as_echo "$as_me: WARNING: liblvm2app is deprecated. Use D-Bus API" >&2;}
fi
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to compile liblvm2cmd.so" >&5
@@ -12821,6 +12878,10 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $BUILD_LVMDBUSD" >&5
$as_echo "$BUILD_LVMDBUSD" >&6; }
if test "$NOTIFYDBUS_SUPPORT" = yes && test "BUILD_LVMDBUSD" = yes; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Building D-Bus support without D-Bus notifications." >&5
$as_echo "$as_me: WARNING: Building D-Bus support without D-Bus notifications." >&2;}
fi
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build Python wrapper for liblvm2app.so" >&5
@@ -12906,7 +12967,7 @@ if ${am_cv_pathless_PYTHON+:} false; then :
$as_echo_n "(cached) " >&6
else
for am_cv_pathless_PYTHON in python python2 python3 python3.3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 python2.1 python2.0 none; do
for am_cv_pathless_PYTHON in python python2 python3 python3.5 python3.4 python3.3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 python2.1 python2.0 none; do
test "$am_cv_pathless_PYTHON" = none && break
prog="import sys
# split strings by '.' and convert to numeric. Append some zeros
@@ -13474,7 +13535,7 @@ if ${am_cv_pathless_PYTHON+:} false; then :
$as_echo_n "(cached) " >&6
else
for am_cv_pathless_PYTHON in python python2 python3 python3.3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 python2.1 python2.0 none; do
for am_cv_pathless_PYTHON in python python2 python3 python3.5 python3.4 python3.3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 python2.1 python2.0 none; do
test "$am_cv_pathless_PYTHON" = none && break
prog="import sys
# split strings by '.' and convert to numeric. Append some zeros
@@ -13798,7 +13859,7 @@ fi
PYTHON3_INCDIRS=`"$PYTHON3_CONFIG" --includes`
PYTHON3_LIBDIRS=`"$PYTHON3_CONFIG" --libs`
PYTHON3DIR=$pythondir
PYTHON_BINDINGS=yes
test "$PYTHON3_BINDINGS" = yes && PYTHON_BINDINGS=yes
fi
if test "$BUILD_LVMDBUSD" = yes; then
@@ -13868,6 +13929,8 @@ $as_echo "no" >&6; }
fi
if test "$PYTHON_BINDINGS" = yes -o "$PYTHON2_BINDINGS" = yes -o "$PYTHON3_BINDINGS" = yes; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Python bindings are deprecated. Use D-Bus API" >&5
$as_echo "$as_me: WARNING: Python bindings are deprecated. Use D-Bus API" >&2;}
test "$APPLIB" != yes && as_fn_error $? "Python_bindings require --enable-applib" "$LINENO" 5
fi
@@ -14027,9 +14090,6 @@ $as_echo "#define HAVE_CANONICALIZE_FILE_NAME 1" >>confdefs.h
fi
################################################################################
test "$exec_prefix" = NONE -a "$prefix" = NONE && exec_prefix=""
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
$as_echo_n "checking for dlopen in -ldl... " >&6; }
@@ -15318,7 +15378,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -15361,7 +15421,7 @@ else
;;
*)
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
for as_dir in $PATH
for as_dir in $PATH_SBIN
do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
@@ -15411,28 +15471,25 @@ _ACEOF
fi
SYSCONFDIR="$(eval echo $(eval echo $sysconfdir))"
lvm_exec_prefix=$exec_prefix
test "$lvm_exec_prefix" = NONE && lvm_exec_prefix=$prefix
test "$lvm_exec_prefix" = NONE && lvm_exec_prefix=$ac_default_prefix
LVM_PATH="$lvm_exec_prefix/sbin/lvm"
SBINDIR="$(eval echo $(eval echo $sbindir))"
LVM_PATH="$SBINDIR/lvm"
cat >>confdefs.h <<_ACEOF
#define LVM_PATH "$LVM_PATH"
_ACEOF
clvmd_prefix=$ac_default_prefix
test "$prefix" != NONE && clvmd_prefix=$prefix
CLVMD_PATH="$clvmd_prefix/sbin/clvmd"
USRSBINDIR="$(eval echo $(eval echo $usrsbindir))"
CLVMD_PATH="$USRSBINDIR/clvmd"
cat >>confdefs.h <<_ACEOF
#define CLVMD_PATH "$CLVMD_PATH"
_ACEOF
FSADM_PATH="$lvm_exec_prefix/sbin/fsadm"
FSADM_PATH="$SBINDIR/fsadm"
cat >>confdefs.h <<_ACEOF
#define FSADM_PATH "$FSADM_PATH"
@@ -15462,7 +15519,7 @@ if test "$BUILD_DMEVENTD" = yes; then
if test "${with_dmeventd_path+set}" = set; then :
withval=$with_dmeventd_path; DMEVENTD_PATH=$withval
else
DMEVENTD_PATH="$lvm_exec_prefix/sbin/dmeventd"
DMEVENTD_PATH="$SBINDIR/dmeventd"
fi
@@ -15543,6 +15600,11 @@ cat >>confdefs.h <<_ACEOF
_ACEOF
# Select default system locking dir, prefer /run/lock over /var/lock
DEFAULT_SYS_LOCK_DIR="$RUN_DIR/lock"
test -d "$DEFAULT_SYS_LOCK_DIR" || DEFAULT_SYS_LOCK_DIR="/var/lock"
# Support configurable locking subdir for lvm
# Check whether --with-default-locking-dir was given.
if test "${with_default_locking_dir+set}" = set; then :
@@ -15550,8 +15612,7 @@ if test "${with_default_locking_dir+set}" = set; then :
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for default lock directory" >&5
$as_echo_n "checking for default lock directory... " >&6; }
DEFAULT_LOCK_DIR="$RUN_DIR/lock/lvm"
test -d "$RUN_DIR/lock" || DEFAULT_LOCK_DIR="/var/lock/lvm"
DEFAULT_LOCK_DIR="$DEFAULT_SYS_LOCK_DIR/lvm"
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $DEFAULT_LOCK_DIR" >&5
$as_echo "$DEFAULT_LOCK_DIR" >&6; }
fi
@@ -15773,6 +15834,12 @@ _ACEOF
@@ -15781,7 +15848,7 @@ _ACEOF
################################################################################
ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/dmfilemapd/Makefile daemons/lvmdbusd/Makefile daemons/lvmdbusd/path.py daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/.symlinks include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/locking/Makefile lib/mirror/Makefile lib/replicator/Makefile include/lvm-version.h lib/raid/Makefile lib/snapshot/Makefile lib/thin/Makefile lib/cache_segtype/Makefile libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/com.redhat.lvmdbus1.service scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmdbusd_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/lvmdump.sh scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile unit-tests/datastruct/Makefile unit-tests/regex/Makefile unit-tests/mm/Makefile"
ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/dmfilemapd/Makefile daemons/lvmdbusd/Makefile daemons/lvmdbusd/lvmdbusd daemons/lvmdbusd/lvmdb.py daemons/lvmdbusd/lvm_shell_proxy.py daemons/lvmdbusd/path.py daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/.symlinks include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/locking/Makefile lib/mirror/Makefile include/lvm-version.h lib/raid/Makefile lib/snapshot/Makefile lib/thin/Makefile lib/cache_segtype/Makefile libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/com.redhat.lvmdbus1.service scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmdbusd_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/lvmdump.sh scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile unit-tests/datastruct/Makefile unit-tests/regex/Makefile unit-tests/mm/Makefile"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
@@ -16491,6 +16558,9 @@ do
"daemons/dmeventd/plugins/thin/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/dmeventd/plugins/thin/Makefile" ;;
"daemons/dmfilemapd/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/dmfilemapd/Makefile" ;;
"daemons/lvmdbusd/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/lvmdbusd/Makefile" ;;
"daemons/lvmdbusd/lvmdbusd") CONFIG_FILES="$CONFIG_FILES daemons/lvmdbusd/lvmdbusd" ;;
"daemons/lvmdbusd/lvmdb.py") CONFIG_FILES="$CONFIG_FILES daemons/lvmdbusd/lvmdb.py" ;;
"daemons/lvmdbusd/lvm_shell_proxy.py") CONFIG_FILES="$CONFIG_FILES daemons/lvmdbusd/lvm_shell_proxy.py" ;;
"daemons/lvmdbusd/path.py") CONFIG_FILES="$CONFIG_FILES daemons/lvmdbusd/path.py" ;;
"daemons/lvmetad/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/lvmetad/Makefile" ;;
"daemons/lvmpolld/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/lvmpolld/Makefile" ;;
@@ -16507,7 +16577,6 @@ do
"lib/format_pool/Makefile") CONFIG_FILES="$CONFIG_FILES lib/format_pool/Makefile" ;;
"lib/locking/Makefile") CONFIG_FILES="$CONFIG_FILES lib/locking/Makefile" ;;
"lib/mirror/Makefile") CONFIG_FILES="$CONFIG_FILES lib/mirror/Makefile" ;;
"lib/replicator/Makefile") CONFIG_FILES="$CONFIG_FILES lib/replicator/Makefile" ;;
"include/lvm-version.h") CONFIG_FILES="$CONFIG_FILES include/lvm-version.h" ;;
"lib/raid/Makefile") CONFIG_FILES="$CONFIG_FILES lib/raid/Makefile" ;;
"lib/snapshot/Makefile") CONFIG_FILES="$CONFIG_FILES lib/snapshot/Makefile" ;;

View File

@@ -39,6 +39,7 @@ case "$host_os" in
LDDEPS="$LDDEPS .export.sym"
LIB_SUFFIX=so
DEVMAPPER=yes
AIO=yes
BUILD_LVMETAD=no
BUILD_LVMPOLLD=no
LOCKDSANLOCK=no
@@ -58,6 +59,7 @@ case "$host_os" in
CLDNOWHOLEARCHIVE=
LIB_SUFFIX=dylib
DEVMAPPER=yes
AIO=no
ODIRECT=no
DM_IOCTLS=no
SELINUX=no
@@ -77,6 +79,7 @@ AC_PROG_CC
AC_PROG_CXX
CFLAGS=$save_CFLAGS
CXXFLAGS=$save_CXXFLAGS
PATH_SBIN="$PATH:/usr/sbin:/sbin"
dnl probably no longer needed in 2008, but...
AC_PROG_GCC_TRADITIONAL
@@ -191,9 +194,15 @@ AC_SUBST(HAVE_FULL_RELRO)
################################################################################
dnl -- Prefix is /usr by default, the exec_prefix default is setup later
AC_PREFIX_DEFAULT(/usr)
if test "$prefix" = NONE; then
datarootdir=${ac_default_prefix}/share
fi
################################################################################
dnl -- Clear default exec_prefix - install into /sbin rather than /usr/sbin
test "$exec_prefix" = NONE -a "$prefix" = NONE && exec_prefix=""
test "x$prefix" = xNONE && prefix=$ac_default_prefix
# Let make expand exec_prefix.
test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
################################################################################
dnl -- Setup the ownership of the files
@@ -408,22 +417,6 @@ AC_DEFINE_UNQUOTED([DEFAULT_RAID10_SEGTYPE], ["$DEFAULT_RAID10_SEGTYPE"],
[Default segtype used for raid10 volumes.])
################################################################################
dnl -- asynchronous volume replicator inclusion type
AC_MSG_CHECKING(whether to include replicators)
AC_ARG_WITH(replicators,
AC_HELP_STRING([--with-replicators=TYPE],
[replicator support: internal/shared/none [none]]),
REPLICATORS=$withval, REPLICATORS=none)
AC_MSG_RESULT($REPLICATORS)
case "$REPLICATORS" in
none|shared) ;;
internal) AC_DEFINE([REPLICATOR_INTERNAL], 1,
[Define to 1 to include built-in support for replicators.]) ;;
*) AC_MSG_ERROR([--with-replicators parameter invalid ($REPLICATORS)]) ;;
esac
AC_ARG_WITH(default-sparse-segtype,
AC_HELP_STRING([--with-default-sparse-segtype=TYPE],
[default sparse segtype: thin/snapshot [thin]]),
@@ -478,7 +471,7 @@ case "$THIN" in
internal|shared)
# Empty means a config way to ignore thin checking
if test "$THIN_CHECK_CMD" = "autodetect"; then
AC_PATH_TOOL(THIN_CHECK_CMD, thin_check)
AC_PATH_TOOL(THIN_CHECK_CMD, thin_check, [], [$PATH_SBIN])
if test -z "$THIN_CHECK_CMD"; then
AC_MSG_WARN([thin_check not found in path $PATH])
THIN_CHECK_CMD=/usr/sbin/thin_check
@@ -502,7 +495,7 @@ case "$THIN" in
fi
# Empty means a config way to ignore thin dumping
if test "$THIN_DUMP_CMD" = "autodetect"; then
AC_PATH_TOOL(THIN_DUMP_CMD, thin_dump)
AC_PATH_TOOL(THIN_DUMP_CMD, thin_dump, [], [$PATH_SBIN])
test -z "$THIN_DUMP_CMD" && {
AC_MSG_WARN(thin_dump not found in path $PATH)
THIN_DUMP_CMD=/usr/sbin/thin_dump
@@ -511,7 +504,7 @@ case "$THIN" in
fi
# Empty means a config way to ignore thin repairing
if test "$THIN_REPAIR_CMD" = "autodetect"; then
AC_PATH_TOOL(THIN_REPAIR_CMD, thin_repair)
AC_PATH_TOOL(THIN_REPAIR_CMD, thin_repair, [], [$PATH_SBIN])
test -z "$THIN_REPAIR_CMD" && {
AC_MSG_WARN(thin_repair not found in path $PATH)
THIN_REPAIR_CMD=/usr/sbin/thin_repair
@@ -520,7 +513,7 @@ case "$THIN" in
fi
# Empty means a config way to ignore thin restoring
if test "$THIN_RESTORE_CMD" = "autodetect"; then
AC_PATH_TOOL(THIN_RESTORE_CMD, thin_restore)
AC_PATH_TOOL(THIN_RESTORE_CMD, thin_restore, [], [$PATH_SBIN])
test -z "$THIN_RESTORE_CMD" && {
AC_MSG_WARN(thin_restore not found in path $PATH)
THIN_RESTORE_CMD=/usr/sbin/thin_restore
@@ -592,7 +585,7 @@ case "$CACHE" in
internal|shared)
# Empty means a config way to ignore cache checking
if test "$CACHE_CHECK_CMD" = "autodetect"; then
AC_PATH_TOOL(CACHE_CHECK_CMD, cache_check)
AC_PATH_TOOL(CACHE_CHECK_CMD, cache_check, [], [$PATH_SBIN])
if test -z "$CACHE_CHECK_CMD"; then
AC_MSG_WARN([cache_check not found in path $PATH])
CACHE_CHECK_CMD=/usr/sbin/cache_check
@@ -627,7 +620,7 @@ case "$CACHE" in
fi
# Empty means a config way to ignore cache dumping
if test "$CACHE_DUMP_CMD" = "autodetect"; then
AC_PATH_TOOL(CACHE_DUMP_CMD, cache_dump)
AC_PATH_TOOL(CACHE_DUMP_CMD, cache_dump, [], [$PATH_SBIN])
test -z "$CACHE_DUMP_CMD" && {
AC_MSG_WARN(cache_dump not found in path $PATH)
CACHE_DUMP_CMD=/usr/sbin/cache_dump
@@ -636,7 +629,7 @@ case "$CACHE" in
fi
# Empty means a config way to ignore cache repairing
if test "$CACHE_REPAIR_CMD" = "autodetect"; then
AC_PATH_TOOL(CACHE_REPAIR_CMD, cache_repair)
AC_PATH_TOOL(CACHE_REPAIR_CMD, cache_repair, [], [$PATH_SBIN])
test -z "$CACHE_REPAIR_CMD" && {
AC_MSG_WARN(cache_repair not found in path $PATH)
CACHE_REPAIR_CMD=/usr/sbin/cache_repair
@@ -645,7 +638,7 @@ case "$CACHE" in
fi
# Empty means a config way to ignore cache restoring
if test "$CACHE_RESTORE_CMD" = "autodetect"; then
AC_PATH_TOOL(CACHE_RESTORE_CMD, cache_restore)
AC_PATH_TOOL(CACHE_RESTORE_CMD, cache_restore, [], [$PATH_SBIN])
test -z "$CACHE_RESTORE_CMD" && {
AC_MSG_WARN(cache_restore not found in path $PATH)
CACHE_RESTORE_CMD=/usr/sbin/cache_restore
@@ -1131,6 +1124,24 @@ if test "$DEVMAPPER" = yes; then
AC_DEFINE([DEVMAPPER_SUPPORT], 1, [Define to 1 to enable LVM2 device-mapper interaction.])
fi
################################################################################
dnl -- Disable aio
AC_MSG_CHECKING(whether to use asynchronous I/O)
AC_ARG_ENABLE(aio,
AC_HELP_STRING([--disable-aio],
[disable asynchronous I/O]),
AIO=$enableval)
AC_MSG_RESULT($AIO)
if test "$AIO" = yes; then
AC_CHECK_LIB(aio, io_setup,
[AC_DEFINE([AIO_SUPPORT], 1, [Define to 1 if aio is available.])
AIO_LIBS="-laio"
AIO_SUPPORT=yes],
[AIO_LIBS=
AIO_SUPPORT=no ])
fi
################################################################################
dnl -- Build lvmetad
AC_MSG_CHECKING(whether to build LVMetaD)
@@ -1464,6 +1475,8 @@ AC_SUBST([LVM2APP_LIB])
test "$APPLIB" = yes \
&& LVM2APP_LIB=-llvm2app \
|| LVM2APP_LIB=
AS_IF([test "$APPLIB"],
[AC_MSG_WARN([liblvm2app is deprecated. Use D-Bus API])])
################################################################################
dnl -- Enable cmdlib
@@ -1484,6 +1497,8 @@ AC_ARG_ENABLE(dbus-service,
AC_HELP_STRING([--enable-dbus-service], [install D-Bus support]),
BUILD_LVMDBUSD=$enableval, BUILD_LVMDBUSD=no)
AC_MSG_RESULT($BUILD_LVMDBUSD)
AS_IF([test "$NOTIFYDBUS_SUPPORT" = yes && test "BUILD_LVMDBUSD" = yes],
[AC_MSG_WARN([Building D-Bus support without D-Bus notifications.])])
################################################################################
dnl -- Enable Python liblvm2app bindings
@@ -1536,7 +1551,7 @@ if test "$PYTHON3_BINDINGS" = yes -o "$BUILD_LVMDBUSD" = yes; then
PYTHON3_INCDIRS=`"$PYTHON3_CONFIG" --includes`
PYTHON3_LIBDIRS=`"$PYTHON3_CONFIG" --libs`
PYTHON3DIR=$pythondir
PYTHON_BINDINGS=yes
test "$PYTHON3_BINDINGS" = yes && PYTHON_BINDINGS=yes
fi
if test "$BUILD_LVMDBUSD" = yes; then
@@ -1546,6 +1561,7 @@ if test "$BUILD_LVMDBUSD" = yes; then
fi
if test "$PYTHON_BINDINGS" = yes -o "$PYTHON2_BINDINGS" = yes -o "$PYTHON3_BINDINGS" = yes; then
AC_MSG_WARN([Python bindings are deprecated. Use D-Bus API])
test "$APPLIB" != yes && AC_MSG_ERROR([Python_bindings require --enable-applib])
fi
@@ -1609,10 +1625,6 @@ AC_CHECK_LIB(c, canonicalize_file_name,
AC_DEFINE([HAVE_CANONICALIZE_FILE_NAME], 1,
[Define to 1 if canonicalize_file_name is available.]))
################################################################################
dnl -- Clear default exec_prefix - install into /sbin rather than /usr/sbin
test "$exec_prefix" = NONE -a "$prefix" = NONE && exec_prefix=""
################################################################################
dnl -- Check for dlopen
AC_CHECK_LIB(dl, dlopen,
@@ -1879,26 +1891,23 @@ if test "$BUILD_DMFILEMAPD" = yes; then
fi
################################################################################
AC_PATH_TOOL(MODPROBE_CMD, modprobe)
AC_PATH_TOOL(MODPROBE_CMD, modprobe, [], [$PATH_SBIN])
if test -n "$MODPROBE_CMD"; then
AC_DEFINE_UNQUOTED([MODPROBE_CMD], ["$MODPROBE_CMD"], [The path to 'modprobe', if available.])
fi
SYSCONFDIR="$(eval echo $(eval echo $sysconfdir))"
lvm_exec_prefix=$exec_prefix
test "$lvm_exec_prefix" = NONE && lvm_exec_prefix=$prefix
test "$lvm_exec_prefix" = NONE && lvm_exec_prefix=$ac_default_prefix
LVM_PATH="$lvm_exec_prefix/sbin/lvm"
SBINDIR="$(eval echo $(eval echo $sbindir))"
LVM_PATH="$SBINDIR/lvm"
AC_DEFINE_UNQUOTED(LVM_PATH, ["$LVM_PATH"], [Path to lvm binary.])
clvmd_prefix=$ac_default_prefix
test "$prefix" != NONE && clvmd_prefix=$prefix
CLVMD_PATH="$clvmd_prefix/sbin/clvmd"
USRSBINDIR="$(eval echo $(eval echo $usrsbindir))"
CLVMD_PATH="$USRSBINDIR/clvmd"
AC_DEFINE_UNQUOTED(CLVMD_PATH, ["$CLVMD_PATH"], [Path to clvmd binary.])
FSADM_PATH="$lvm_exec_prefix/sbin/fsadm"
FSADM_PATH="$SBINDIR/fsadm"
AC_DEFINE_UNQUOTED(FSADM_PATH, ["$FSADM_PATH"], [Path to fsadm binary.])
################################################################################
@@ -1918,7 +1927,7 @@ if test "$BUILD_DMEVENTD" = yes; then
AC_HELP_STRING([--with-dmeventd-path=PATH],
[dmeventd path [EPREFIX/sbin/dmeventd]]),
DMEVENTD_PATH=$withval,
DMEVENTD_PATH="$lvm_exec_prefix/sbin/dmeventd")
DMEVENTD_PATH="$SBINDIR/dmeventd")
AC_DEFINE_UNQUOTED(DMEVENTD_PATH, ["$DMEVENTD_PATH"],
[Path to dmeventd binary.])
fi
@@ -1961,13 +1970,17 @@ AC_ARG_WITH(default-cache-subdir,
AC_DEFINE_UNQUOTED(DEFAULT_CACHE_SUBDIR, ["$DEFAULT_CACHE_SUBDIR"],
[Name of default metadata cache subdirectory.])
# Select default system locking dir, prefer /run/lock over /var/lock
DEFAULT_SYS_LOCK_DIR="$RUN_DIR/lock"
test -d "$DEFAULT_SYS_LOCK_DIR" || DEFAULT_SYS_LOCK_DIR="/var/lock"
# Support configurable locking subdir for lvm
AC_ARG_WITH(default-locking-dir,
AC_HELP_STRING([--with-default-locking-dir=DIR],
[default locking directory [autodetect_lock_dir/lvm]]),
DEFAULT_LOCK_DIR=$withval,
[AC_MSG_CHECKING(for default lock directory)
DEFAULT_LOCK_DIR="$RUN_DIR/lock/lvm"
test -d "$RUN_DIR/lock" || DEFAULT_LOCK_DIR="/var/lock/lvm"
DEFAULT_LOCK_DIR="$DEFAULT_SYS_LOCK_DIR/lvm"
AC_MSG_RESULT($DEFAULT_LOCK_DIR)])
AC_DEFINE_UNQUOTED(DEFAULT_LOCK_DIR, ["$DEFAULT_LOCK_DIR"],
[Name of default locking directory.])
@@ -2062,14 +2075,17 @@ AC_SUBST(DEFAULT_RAID10_SEGTYPE)
AC_SUBST(DEFAULT_RUN_DIR)
AC_SUBST(DEFAULT_SPARSE_SEGTYPE)
AC_SUBST(DEFAULT_SYS_DIR)
AC_SUBST(DEFAULT_SYS_LOCK_DIR)
AC_SUBST(DEFAULT_USE_BLKID_WIPING)
AC_SUBST(DEFAULT_USE_LVMETAD)
AC_SUBST(DEFAULT_USE_LVMPOLLD)
AC_SUBST(DEFAULT_USE_LVMLOCKD)
AC_SUBST(DEVMAPPER)
AC_SUBST(AIO)
AC_SUBST(DLM_CFLAGS)
AC_SUBST(DLM_LIBS)
AC_SUBST(DL_LIBS)
AC_SUBST(AIO_LIBS)
AC_SUBST(DMEVENTD_PATH)
AC_SUBST(DM_LIB_PATCHLEVEL)
AC_SUBST(ELDFLAGS)
@@ -2126,8 +2142,10 @@ AC_SUBST(SACKPT_CFLAGS)
AC_SUBST(SACKPT_LIBS)
AC_SUBST(SALCK_CFLAGS)
AC_SUBST(SALCK_LIBS)
AC_SUBST(SBINDIR)
AC_SUBST(SELINUX_LIBS)
AC_SUBST(SELINUX_PC)
AC_SUBST(SYSCONFDIR)
AC_SUBST(SYSTEMD_LIBS)
AC_SUBST(SNAPSHOTS)
AC_SUBST(STATICDIR)
@@ -2150,6 +2168,7 @@ AC_SUBST(UDEV_SYSTEMD_BACKGROUND_JOBS)
AC_SUBST(UDEV_RULE_EXEC_DETECTION)
AC_SUBST(UDEV_HAS_BUILTIN_BLKID)
AC_SUBST(USE_TRACKING)
AC_SUBST(USRSBINDIR)
AC_SUBST(VALGRIND_POOL)
AC_SUBST(WRITE_INSTALL)
AC_SUBST(DMEVENTD_PIDFILE)
@@ -2190,6 +2209,9 @@ daemons/dmeventd/plugins/snapshot/Makefile
daemons/dmeventd/plugins/thin/Makefile
daemons/dmfilemapd/Makefile
daemons/lvmdbusd/Makefile
daemons/lvmdbusd/lvmdbusd
daemons/lvmdbusd/lvmdb.py
daemons/lvmdbusd/lvm_shell_proxy.py
daemons/lvmdbusd/path.py
daemons/lvmetad/Makefile
daemons/lvmpolld/Makefile
@@ -2206,7 +2228,6 @@ lib/format1/Makefile
lib/format_pool/Makefile
lib/locking/Makefile
lib/mirror/Makefile
lib/replicator/Makefile
include/lvm-version.h
lib/raid/Makefile
lib/snapshot/Makefile

View File

@@ -1111,12 +1111,18 @@ static void be_daemon(int timeout)
}
/* Detach ourself from the calling environment */
(void) dup2(devnull, STDIN_FILENO);
(void) dup2(devnull, STDOUT_FILENO);
(void) dup2(devnull, STDERR_FILENO);
if ((dup2(devnull, STDIN_FILENO) == -1) ||
(dup2(devnull, STDOUT_FILENO) == -1) ||
(dup2(devnull, STDERR_FILENO) == -1)) {
perror("Error setting terminal FDs to /dev/null");
log_error("Error setting terminal FDs to /dev/null: %m");
exit(5);
}
if (devnull > STDERR_FILENO)
(void) close(devnull);
if ((devnull > STDERR_FILENO) && close(devnull)) {
log_sys_error("close", "/dev/null");
exit(7);
}
if (chdir("/")) {
log_error("Error setting current directory to /: %m");
@@ -2145,6 +2151,14 @@ static int add_to_lvmqueue(struct local_client *client, struct clvm_header *msg,
}
/* Return 0 if we can talk to an existing clvmd */
/*
* FIXME:
*
* This function returns only -1 or 0, but there are
* different levels of errors, some of them should stop
* further execution of clvmd thus another state is needed
* and some error message need to be only informational.
*/
static int check_local_clvmd(void)
{
int local_socket;
@@ -2164,7 +2178,11 @@ static int check_local_clvmd(void)
if (connect(local_socket,(struct sockaddr *) &sockaddr,
sizeof(sockaddr))) {
log_sys_error("connect", "local socket");
/* connection failure is expected state */
if (errno == ENOENT)
log_sys_debug("connect", "local socket");
else
log_sys_error("connect", "local socket");
ret = -1;
}

View File

@@ -62,6 +62,8 @@
#include <syslog.h>
#define DM_SIGNALED_EXIT 1
#define DM_SCHEDULED_EXIT 2
static volatile sig_atomic_t _exit_now = 0; /* set to '1' when signal is given to exit */
/* List (un)link macros. */
@@ -752,6 +754,7 @@ static void *_timeout_thread(void *unused __attribute__((unused)))
struct thread_status *thread;
struct timespec timeout;
time_t curr_time;
int ret;
DEBUGLOG("Timeout thread starting.");
pthread_cleanup_push(_exit_timeout, NULL);
@@ -773,7 +776,10 @@ static void *_timeout_thread(void *unused __attribute__((unused)))
} else {
DEBUGLOG("Sending SIGALRM to Thr %x for timeout.",
(int) thread->thread);
pthread_kill(thread->thread, SIGALRM);
ret = pthread_kill(thread->thread, SIGALRM);
if (ret && (ret != ESRCH))
log_error("Unable to wakeup Thr %x for timeout: %s.",
(int) thread->thread, strerror(ret));
}
_unlock_mutex();
}
@@ -863,6 +869,7 @@ static int _event_wait(struct thread_status *thread)
* This is so that you can break out of waiting on an event,
* either for a timeout event, or to cancel the thread.
*/
sigemptyset(&old);
sigemptyset(&set);
sigaddset(&set, SIGALRM);
if (pthread_sigmask(SIG_UNBLOCK, &set, &old) != 0) {
@@ -1750,7 +1757,7 @@ static void _init_thread_signals(void)
*/
static void _exit_handler(int sig __attribute__((unused)))
{
_exit_now = 1;
_exit_now = DM_SIGNALED_EXIT;
}
#ifdef __linux__
@@ -2248,6 +2255,8 @@ int main(int argc, char *argv[])
for (;;) {
if (_idle_since) {
if (_exit_now) {
if (_exit_now == DM_SCHEDULED_EXIT)
break; /* Only prints shutdown message */
log_info("dmeventd detected break while being idle "
"for %ld second(s), exiting.",
(long) (time(NULL) - _idle_since));
@@ -2264,15 +2273,14 @@ int main(int argc, char *argv[])
break;
}
}
} else if (_exit_now) {
_exit_now = 0;
} else if (_exit_now == DM_SIGNALED_EXIT) {
_exit_now = DM_SCHEDULED_EXIT;
/*
* When '_exit_now' is set, signal has been received,
* but can not simply exit unless all
* threads are done processing.
*/
log_warn("WARNING: There are still devices being monitored.");
log_warn("WARNING: Refusing to exit.");
log_info("dmeventd received break, scheduling exit.");
}
_process_request(&fifos);
_cleanup_unused_threads();

View File

@@ -62,8 +62,6 @@ struct dso_state {
DM_EVENT_LOG_FN("thin")
#define UUID_PREFIX "LVM-"
static int _run_command(struct dso_state *state)
{
char val[3][36];

View File

@@ -1 +1,4 @@
path.py
lvmdbusd
lvmdb.py
lvm_shell_proxy.py

View File

@@ -26,9 +26,7 @@ LVMDBUS_SRCDIR_FILES = \
__init__.py \
job.py \
loader.py \
lvmdb.py \
main.py \
lvm_shell_proxy.py \
lv.py \
manager.py \
objectmanager.py \
@@ -40,14 +38,19 @@ LVMDBUS_SRCDIR_FILES = \
vg.py
LVMDBUS_BUILDDIR_FILES = \
lvmdb.py \
lvm_shell_proxy.py \
path.py
LVMDBUSD = $(srcdir)/lvmdbusd
LVMDBUSD = lvmdbusd
include $(top_builddir)/make.tmpl
.PHONY: install_lvmdbusd
all:
test -x $(LVMDBUSD) || chmod 755 $(LVMDBUSD)
install_lvmdbusd:
$(INSTALL_DIR) $(sbindir)
$(INSTALL_SCRIPT) $(LVMDBUSD) $(sbindir)
@@ -63,4 +66,5 @@ install_lvm2: install_lvmdbusd
install: install_lvm2
DISTCLEAN_TARGETS+= \
$(LVMDBUS_BUILDDIR_FILES)
$(LVMDBUS_BUILDDIR_FILES) \
$(LVMDBUSD)

View File

@@ -158,5 +158,6 @@ def _run_cmd(req):
def cmd_runner(request):
t = threading.Thread(target=_run_cmd, args=(request,))
t = threading.Thread(target=_run_cmd, args=(request,),
name="cmd_runner %s" % str(request.method))
t.start()

View File

@@ -152,7 +152,8 @@ class StateUpdate(object):
load(refresh=False, emit_signal=False, need_main_thread=False)
self.thread = threading.Thread(target=StateUpdate.update_thread,
args=(self,))
args=(self,),
name="StateUpdate.update_thread")
def load(self, refresh=True, emit_signal=True, cache_refresh=True,
log=True, need_main_thread=True):

View File

@@ -8,7 +8,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from .automatedproperties import AutomatedProperties
from .utils import job_obj_path_generate, mt_async_result, mt_run_no_wait
from .utils import job_obj_path_generate, mt_async_call
from . import cfg
from .cfg import JOB_INTERFACE
import dbus
@@ -30,7 +30,7 @@ class WaitingClient(object):
# Remove ourselves from waiting client
wc.job_state.remove_waiting_client(wc)
wc.timer_id = -1
mt_async_result(wc.cb, wc.job_state.Complete)
mt_async_call(wc.cb, wc.job_state.Complete)
wc.job_state = None
def __init__(self, job_state, tmo, cb, cbe):
@@ -55,7 +55,7 @@ class WaitingClient(object):
GLib.source_remove(self.timer_id)
self.timer_id = -1
mt_async_result(self.cb, self.job_state.Complete)
mt_async_call(self.cb, self.job_state.Complete)
self.job_state = None
@@ -188,7 +188,7 @@ class Job(AutomatedProperties):
@Complete.setter
def Complete(self, value):
self.state.Complete = value
mt_run_no_wait(Job._signal_complete, self)
mt_async_call(Job._signal_complete, self)
@property
def GetError(self):

View File

@@ -232,7 +232,6 @@ class LvState(State):
@utils.dbus_property(LV_COMMON_INTERFACE, 'Attr', 's')
@utils.dbus_property(LV_COMMON_INTERFACE, 'DataPercent', 'u')
@utils.dbus_property(LV_COMMON_INTERFACE, 'SnapPercent', 'u')
@utils.dbus_property(LV_COMMON_INTERFACE, 'DataPercent', 'u')
@utils.dbus_property(LV_COMMON_INTERFACE, 'MetaDataPercent', 'u')
@utils.dbus_property(LV_COMMON_INTERFACE, 'CopyPercent', 'u')
@utils.dbus_property(LV_COMMON_INTERFACE, 'SyncPercent', 'u')

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!@PYTHON3@
# Copyright (C) 2015-2016 Red Hat, Inc. All rights reserved.
#

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!@PYTHON3@
# Copyright (C) 2015-2016 Red Hat, Inc. All rights reserved.
#

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!@PYTHON3@
# Copyright (C) 2015-2016 Red Hat, Inc. All rights reserved.
#

View File

@@ -63,6 +63,24 @@ def check_bb_size(value):
return v
def install_signal_handlers():
# Because of the glib main loop stuff the python signal handler code is
# apparently not usable and we need to use the glib calls instead
signal_add = None
if hasattr(GLib, 'unix_signal_add'):
signal_add = GLib.unix_signal_add
elif hasattr(GLib, 'unix_signal_add_full'):
signal_add = GLib.unix_signal_add_full
if signal_add:
signal_add(GLib.PRIORITY_HIGH, signal.SIGHUP, utils.handler, signal.SIGHUP)
signal_add(GLib.PRIORITY_HIGH, signal.SIGINT, utils.handler, signal.SIGINT)
signal_add(GLib.PRIORITY_HIGH, signal.SIGUSR1, utils.handler, signal.SIGUSR1)
else:
log_error("GLib.unix_signal_[add|add_full] are NOT available!")
def main():
start = time.time()
# Add simple command line handling
@@ -112,12 +130,7 @@ def main():
# List of threads that we start up
thread_list = []
# Install signal handlers
for s in [signal.SIGHUP, signal.SIGINT]:
try:
signal.signal(s, utils.handler)
except RuntimeError:
pass
install_signal_handlers()
dbus.mainloop.glib.DBusGMainLoop(set_as_default=True)
dbus.mainloop.glib.threads_init()
@@ -138,7 +151,8 @@ def main():
# Using a thread to process requests, we cannot hang the dbus library
# thread that is handling the dbus interface
thread_list.append(threading.Thread(target=process_request))
thread_list.append(threading.Thread(target=process_request,
name='process_request'))
# Have a single thread handling updating lvm and the dbus model so we
# don't have multiple threads doing this as the same time
@@ -176,5 +190,7 @@ def main():
for thread in thread_list:
thread.join()
except KeyboardInterrupt:
utils.handler(signal.SIGINT, None)
# If we are unable to register signal handler, we will end up here when
# the service gets a ^C or a kill -2 <parent pid>
utils.handler(signal.SIGINT)
return 0

View File

@@ -79,7 +79,9 @@ class PvState(State):
self.lv = self._lv_object_list(vg_name)
if vg_name:
# It's possible to have a vg_name and no uuid with the main example
# being when the vg_name == '[unknown]'
if vg_uuid and vg_name:
self.vg_path = cfg.om.get_object_path_by_uuid_lvm_id(
vg_uuid, vg_name, vg_obj_path_generate)
else:

View File

@@ -13,7 +13,7 @@ from gi.repository import GLib
from .job import Job
from . import cfg
import traceback
from .utils import log_error, mt_async_result
from .utils import log_error, mt_async_call
class RequestEntry(object):
@@ -116,9 +116,9 @@ class RequestEntry(object):
if error_rc == 0:
if self.cb:
if self._return_tuple:
mt_async_result(self.cb, (result, '/'))
mt_async_call(self.cb, (result, '/'))
else:
mt_async_result(self.cb, result)
mt_async_call(self.cb, result)
else:
if self.cb_error:
if not error_exception:
@@ -129,7 +129,7 @@ class RequestEntry(object):
else:
error_exception = Exception(error_msg)
mt_async_result(self.cb_error, error_exception)
mt_async_call(self.cb_error, error_exception)
else:
# We have a job and it's complete, indicate that it's done.
self._job.Complete = True

View File

@@ -20,7 +20,8 @@ from lvmdbusd import cfg
# noinspection PyUnresolvedReferences
from gi.repository import GLib
import threading
import traceback
import signal
STDOUT_TTY = os.isatty(sys.stdout.fileno())
@@ -281,12 +282,47 @@ def log_error(msg, *attributes):
_common_log(msg, *attributes)
def dump_threads_stackframe():
ident_to_name = {}
for thread_object in threading.enumerate():
ident_to_name[thread_object.ident] = thread_object
stacks = []
for thread_ident, frame in sys._current_frames().items():
stack = traceback.format_list(traceback.extract_stack(frame))
# There is a possibility that a thread gets created after we have
# enumerated all threads, so this lookup table may be incomplete, so
# account for this
if thread_ident in ident_to_name:
thread_name = ident_to_name[thread_ident].name
else:
thread_name = "unknown"
stacks.append("Thread: %s" % (thread_name))
stacks.append("".join(stack))
log_error("Dumping thread stack frames!\n" + "\n".join(stacks))
# noinspection PyUnusedLocal
def handler(signum, frame):
cfg.run.value = 0
log_debug('Signal handler called with signal %d' % signum)
if cfg.loop is not None:
cfg.loop.quit()
def handler(signum):
try:
if signum == signal.SIGUSR1:
dump_threads_stackframe()
else:
cfg.run.value = 0
log_debug('Exiting daemon with signal %d' % signum)
if cfg.loop is not None:
cfg.loop.quit()
except:
st = traceback.format_exc()
log_error("signal handler: exception (logged, not reported!) \n %s" % st)
# It's important we report that we handled the exception for the exception
# handler to continue to work, especially for signal 10 (SIGUSR1)
return True
def pv_obj_path_generate():
@@ -534,21 +570,27 @@ def add_no_notify(cmdline):
# ensure all dbus library interaction is done from the same thread!
def _async_result(call_back, results):
log_debug('Results = %s' % str(results))
call_back(results)
def _async_handler(call_back, parameters):
params_str = ", ".join(str(x) for x in parameters)
log_debug('Main thread execution, callback = %s, parameters = (%s)' %
(str(call_back), params_str))
try:
if parameters:
call_back(*parameters)
else:
call_back()
except:
st = traceback.format_exc()
log_error("mt_async_call: exception (logged, not reported!) \n %s" % st)
# Return result in main thread
def mt_async_result(call_back, results):
GLib.idle_add(_async_result, call_back, results)
# Execute the function on the main thread with the provided parameters, do
# not return *any* value or wait for the execution to complete!
def mt_async_call(function_call_back, *parameters):
GLib.idle_add(_async_handler, function_call_back, parameters)
# Take the supplied function and run it on the main thread and not wait for
# a result!
def mt_run_no_wait(function, param):
GLib.idle_add(function, param)
# Run the supplied function and arguments on the main thread and wait for them
# to complete while allowing the ability to get the return value too.
#
@@ -568,6 +610,7 @@ class MThreadRunner(object):
def __init__(self, function, *args):
self.f = function
self.rc = None
self.exception = None
self.args = args
self.function_complete = False
self.cond = threading.Condition(threading.Lock())
@@ -577,13 +620,21 @@ class MThreadRunner(object):
with self.cond:
if not self.function_complete:
self.cond.wait()
if self.exception:
raise self.exception
return self.rc
def _run(self):
if len(self.args):
self.rc = self.f(*self.args)
else:
self.rc = self.f()
try:
if self.args:
self.rc = self.f(*self.args)
else:
self.rc = self.f()
except BaseException as be:
self.exception = be
st = traceback.format_exc()
log_error("MThreadRunner: exception \n %s" % st)
log_error("Exception will be raised in calling thread!")
def _remove_objects(dbus_objects_rm):

View File

@@ -48,5 +48,7 @@ static inline void lvmlockd_close(daemon_handle h)
#define EVGKILLED 217 /* sanlock lost access to leases and VG is killed. */
#define ELOCKIO 218 /* sanlock io errors during lock op, may be transient. */
#define EREMOVED 219
#define EDEVOPEN 220 /* sanlock failed to open lvmlock LV */
#define ELMERR 221
#endif /* _LVM_LVMLOCKD_CLIENT_H */

View File

@@ -1389,12 +1389,11 @@ static int res_convert(struct lockspace *ls, struct resource *r,
}
rv = lm_convert(ls, r, act->mode, act, r_version);
if (rv < 0) {
log_error("S %s R %s res_convert lm error %d", ls->name, r->name, rv);
return rv;
}
log_debug("S %s R %s res_convert lm done", ls->name, r->name);
log_debug("S %s R %s res_convert rv %d", ls->name, r->name, rv);
if (rv < 0)
return rv;
if (lk->mode == LD_LK_EX && act->mode == LD_LK_SH) {
r->sh_count = 1;
@@ -3676,7 +3675,17 @@ static int client_send_result(struct client *cl, struct action *act)
if (!gl_lsname_dlm[0])
strcat(result_flags, "NO_GL_LS,");
} else {
strcat(result_flags, "NO_GL_LS,NO_LM");
int found_lm = 0;
if (lm_support_dlm() && lm_is_running_dlm())
found_lm++;
if (lm_support_sanlock() && lm_is_running_sanlock())
found_lm++;
if (!found_lm)
strcat(result_flags, "NO_GL_LS,NO_LM");
else
strcat(result_flags, "NO_GL_LS");
}
}

View File

@@ -508,7 +508,7 @@ lockrv:
}
if (rv < 0) {
log_error("S %s R %s lock_dlm acquire error %d errno %d", ls->name, r->name, rv, errno);
return rv;
return -ELMERR;
}
if (rdd->vb) {
@@ -581,6 +581,7 @@ int lm_convert_dlm(struct lockspace *ls, struct resource *r,
}
if (rv < 0) {
log_error("S %s R %s convert_dlm error %d", ls->name, r->name, rv);
rv = -ELMERR;
}
return rv;
}
@@ -654,6 +655,7 @@ int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
0, NULL, NULL, NULL);
if (rv < 0) {
log_error("S %s R %s unlock_dlm error %d", ls->name, r->name, rv);
rv = -ELMERR;
}
return rv;

View File

@@ -356,12 +356,19 @@ int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_ar
log_debug("sanlock daemon version %08x proto %08x",
daemon_version, daemon_proto);
align_size = sanlock_align(&disk);
if (align_size <= 0) {
log_error("S %s init_vg_san bad disk align size %d %s",
ls_name, align_size, disk.path);
return -EARGS;
}
rv = sanlock_align(&disk);
if (rv <= 0) {
if (rv == -EACCES) {
log_error("S %s init_vg_san sanlock error -EACCES: no permission to access %s",
ls_name, disk.path);
return -EDEVOPEN;
} else {
log_error("S %s init_vg_san sanlock error %d trying to get align size of %s",
ls_name, rv, disk.path);
return -EARGS;
}
} else
align_size = rv;
strncpy(ss.name, ls_name, SANLK_NAME_LEN);
memcpy(ss.host_id_disk.path, disk.path, SANLK_PATH_LEN);
@@ -1453,6 +1460,12 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
rv = sanlock_acquire(lms->sock, -1, flags, 1, &rs, &opt);
/*
* errors: translate the sanlock error number to an lvmlockd error.
* We don't want to return an sanlock-specific error number from
* this function to code that doesn't recognize sanlock error numbers.
*/
if (rv == -EAGAIN) {
/*
* It appears that sanlock_acquire returns EAGAIN when we request
@@ -1521,6 +1534,26 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
return -EAGAIN;
}
if (rv == SANLK_AIO_TIMEOUT) {
/*
* sanlock got an i/o timeout when trying to acquire the
* lease on disk.
*/
log_debug("S %s R %s lock_san acquire mode %d rv %d", ls->name, r->name, ld_mode, rv);
*retry = 0;
return -EAGAIN;
}
if (rv == SANLK_DBLOCK_LVER || rv == SANLK_DBLOCK_MBAL) {
/*
* There was contention with another host for the lease,
* and we lost.
*/
log_debug("S %s R %s lock_san acquire mode %d rv %d", ls->name, r->name, ld_mode, rv);
*retry = 0;
return -EAGAIN;
}
if (rv == SANLK_ACQUIRE_OWNED_RETRY) {
/*
* The lock is held by a failed host, and will eventually
@@ -1571,15 +1604,25 @@ int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
if (rv == -ENOSPC)
rv = -ELOCKIO;
return rv;
/*
* generic error number for sanlock errors that we are not
* catching above.
*/
return -ELMERR;
}
/*
* sanlock acquire success (rv 0)
*/
if (rds->vb) {
rv = sanlock_get_lvb(0, rs, (char *)&vb, sizeof(vb));
if (rv < 0) {
log_error("S %s R %s lock_san get_lvb error %d", ls->name, r->name, rv);
memset(rds->vb, 0, sizeof(struct val_blk));
memset(vb_out, 0, sizeof(struct val_blk));
/* the lock is still acquired, the vb values considered invalid */
rv = 0;
goto out;
}
@@ -1632,6 +1675,7 @@ int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
if (rv < 0) {
log_error("S %s R %s convert_san set_lvb error %d",
ls->name, r->name, rv);
return -ELMERR;
}
}
@@ -1644,14 +1688,35 @@ int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
if (daemon_test)
return 0;
/*
* Don't block waiting for a failed lease to expire since it causes
* sanlock_convert to block for a long time, which would prevent this
* thread from processing other lock requests.
*
* FIXME: SANLK_CONVERT_OWNER_NOWAIT is the same as SANLK_ACQUIRE_OWNER_NOWAIT.
* Change to use the CONVERT define when the latest sanlock version has it.
*/
flags |= SANLK_ACQUIRE_OWNER_NOWAIT;
rv = sanlock_convert(lms->sock, -1, flags, rs);
if (rv == -EAGAIN) {
/* FIXME: When could this happen? Should something different be done? */
log_error("S %s R %s convert_san EAGAIN", ls->name, r->name);
if (!rv)
return 0;
switch (rv) {
case -EAGAIN:
case SANLK_ACQUIRE_IDLIVE:
case SANLK_ACQUIRE_OWNED:
case SANLK_ACQUIRE_OWNED_RETRY:
case SANLK_ACQUIRE_OTHER:
case SANLK_AIO_TIMEOUT:
case SANLK_DBLOCK_LVER:
case SANLK_DBLOCK_MBAL:
/* expected errors from known/normal cases like lock contention or io timeouts */
log_debug("S %s R %s convert_san error %d", ls->name, r->name, rv);
return -EAGAIN;
}
if (rv < 0) {
default:
log_error("S %s R %s convert_san convert error %d", ls->name, r->name, rv);
rv = -ELMERR;
}
return rv;
@@ -1688,6 +1753,7 @@ static int release_rename(struct lockspace *ls, struct resource *r)
rv = sanlock_release(lms->sock, -1, SANLK_REL_RENAME, 2, res_args);
if (rv < 0) {
log_error("S %s R %s unlock_san release rename error %d", ls->name, r->name, rv);
rv = -ELMERR;
}
free(res_args);
@@ -1744,6 +1810,7 @@ int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
if (rv < 0) {
log_error("S %s R %s unlock_san set_lvb error %d",
ls->name, r->name, rv);
return -ELMERR;
}
}
@@ -1762,6 +1829,8 @@ int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
if (rv == -EIO)
rv = -ELOCKIO;
else if (rv < 0)
rv = -ELMERR;
return rv;
}

215
doc/aio_design.txt Normal file
View File

@@ -0,0 +1,215 @@
Introducing asynchronous I/O to LVM
===================================
Issuing I/O asynchronously means instructing the kernel to perform specific
I/O and return immediately without waiting for it to complete. The data
is collected from the kernel later.
Advantages
----------
A1. While waiting for the I/O to happen, the program could perform other
operations.
A2. When LVM is searching for its Physical Volumes, it issues a small amount of
I/O to a large number of disks. If this was issued in parallel the overall
runtime might be shorter while there should be little effect on the cpu time.
A3. If more than one timeout occurs when accessing any devices, these can be
taken in parallel, again reducing the runtime. This applies globally,
not just while the code is searching for Physical Volumes, so reading,
writing and committing the metadata may occasionally benefit too to some
extent and there are probably maintenance advantages in using the same
method of I/O throughout the main body of the code.
A4. By introducing a simple callback function mechanism, the conversion can be
performed largely incrementally by first refactoring and continuing to
use synchronous I/O with the callbacks performed immediately. This allows the
callbacks to be introduced without changing the running sequence of the code
initially. Future projects could refactor some of the calling sites to
simplify the code structure and even eliminate some of the nesting.
This allows each part of what might ultimately amount to a large change to be
introduced and tested independently.
Disadvantages
-------------
D1. The resulting code may be more complex with more failure modes to
handle. Mitigate by thorough auditing and testing, rolling out
gradually, and offering a simple switch to revert to the old behaviour.
D2. The linux asynchronous I/O implementation is less mature than
its synchronous I/O implementation and might show up problems that
depend on the version of the kernel or library used. Fixes or
workarounds for some of these might require kernel changes. For
example, there are suggestions that despite being supposedly async,
there are still cases where system calls can block. There might be
resource dependencies on other processes running on the system that make
it unsuitable for use while any devices are suspended. Mitigation
as for D1.
D3. The error handling within callbacks becomes more complicated.
However we know that existing call paths can already sometimes discard
errors, sometimes deliberately, sometimes not, so this aspect is in need
of a complete review anyway and the new approach will make the error
handling more transparent. Aim initially for overall behaviour that is
no worse than that of the existing code, then work on improving it
later.
D4. The work will take a few weeks to code and test. This leads to a
significant opportunity cost when compared against other enhancements
that could be achieved in that time. However, the proof-of-concept work
performed while writing this design has satisfied me that the work could
proceed and be committed incrementally as a background task.
Observations regarding LVM's I/O Architecture
---------------------------------------------
H1. All device, metadata and config file I/O is constrained to pass through a
single route in lib/device.
H2. The first step of the analysis was to instrument this code path with
log_debug messages. I/O is split into the following categories:
"dev signatures",
"PV labels",
"VG metadata header",
"VG metadata content",
"extra VG metadata header",
"extra VG metadata content",
"LVM1 metadata",
"pool metadata",
"LV content",
"logging",
H3. A bounce buffer is used for most I/O.
H4. Most callers finish using the supplied data before any further I/O is
issued. The few that don't could be converted trivially to do so.
H5. There is one stream of I/O per metadata area on each device.
H6. Some reads fall at offsets close to immediately preceding reads, so it's
possible to avoid these by caching one "block" per metadata area I/O stream.
H7. Simple analysis suggests a minimum aligned read size of 8k would deliver
immediate gains from this caching. A larger size might perform worse because
almost all the time the extra data read would not be used, but this can be
re-examined and tuned after the code is in place.
Proposal
--------
P1. Retain the "single I/O path" but offer an asynchronous option.
P2. Eliminate the bounce buffer in most cases by improving alignment.
P3. Reduce the number of reads by always reading a minimum of an aligned
8k block.
P4. Eliminate repeated reads by caching the last block read and changing
the lib/device interface to return a pointer to read-only data within
this block.
P5. Only perform these interface changes for code on the critical path
for now by converting other code sites to use wrappers around the new
interface.
P6. Treat asynchronous I/O as the interface of choice and optimise only
for this case.
P7. Convert the callers on the critical path to pass callback functions
to the device layer. These functions will be called later with the
read-only data, a context pointer and a success/failure indicator.
Where an existing function performs a sequence of I/O, this has the
advantage of breaking up the large function into smaller ones and
wrapping the parameters used into structures. While this might look
rather messy and ad-hoc in the short-term, it's a first step towards
breaking up confusingly long functions into component parts and wrapping
the existing long parameter lists into more appropriate structures and
refactoring these parts of the code.
P8. Limit the resources used by the asynchronous I/O by using two
tunable parameters, one limiting the number of outstanding I/Os issued
and another limiting the total amount of memory used.
P9. Provide a fallback option if asynchronous I/O is unavailable by
sharing the code paths but issuing the I/O synchronously and calling the
callback immediately.
P10. Only allocate the buffer for the I/O at the point where the I/O is
about to be issued.
P11. If the thresholds are exceeded, add the request to a simple queue,
and process it later after some I/O has completed.
Future work
-----------
F1. Perform a complete review of the error tracking so that device
failures are handled and reported more cleanly, extending the existing
basic error counting mechanism.
F2. Consider whether some of the nested callbacks can be eliminated,
which would allow for additional simplifications.
F3. Adjust the contents of the adhoc context structs into more logical
arrangements and use them more widely.
F4. Perform wider refactoring of these areas of code.
Testing considerations
----------------------
T1. The changes touch code on the device path, so a thorough re-test of
the device layer is required. The new code needs a full audit down
through the library layer into the kernel to check that all the error
conditions that are currently implemented (such as EAGAIN) are handled
sensibly. (LVM's I/O layer needs to remain as solid as we can make it.)
T2. The current test suite provides a reasonably broad range of coverage
of this area but is far from comprehensive.
Acceptance criteria
-------------------
A1. The current test suite should pass to the same extent as before the
changes.
A2. When all debugging and logging is disabled, strace -c must show
improvements e.g. the expected fewer number of reads.
A3. Running a range of commands under valgrind must not reveal any
new leaks due to the changes.
A4. All new coverity reports from the change must be addressed.
A5. CPU time should be similar to that before, as the same work
is being done overall, just in a different order.
A6. Tests need to show improved behaviour in targetted areas. For example,
if several devices are slow and time out, the delays should occur
in parallel and the elapsed time should be less than before.
Release considerations
----------------------
R1. Async I/O should be widely available and largely reliable on linux
nowadays (even though parts of its interface and implementation remain a
matter of controversy) so we should try to make its use the default
whereever it is supported. If certain types of systems have problems we
should try to detect those cases and disable it automatically there.
R2. Because the implications of an unexpected problem in the new code
could be severe for the people affected, the roll out needs to be gentle
without a deadline to allow us plenty of time to gain confidence in the
new code. Our own testing will only be able to cover a tiny fraction of
the different setups our users have, so we need to look out for problems
caused by this proactively and encourage people to test it on their own
systems and report back. It must go into the tree near the start of a
release cycle rather than at the end to provide time for our confidence
in it to grow.

85
doc/vdo.md Normal file
View File

@@ -0,0 +1,85 @@
# VDO - Compression and deduplication.
Currently device stacking looks like this:
Physical x [multipath] x [partition] x [mdadm] x [LUKS] x [LVS] x [LUKS] x [FS|Database|...]
Adding VDO:
Physical x [multipath] x [partition] x [mdadm] x [LUKS] x [LVS] x [LUKS] x VDO x [LVS] x [FS|Database|...]
## Where VDO fits (and where it does not):
### Backing devices for VDO volumes:
1. Physical x [multipath] x [partition] x [mdadm],
2. LUKS over (1) - full disk encryption.
3. LVs (raids|mirror|stripe|linear) x [cache] over (1).
4. LUKS over (3) - especially when using raids.
Usual limitations apply:
- Never layer LUKS over another LUKS - it makes no sense.
- LUKS is better over the raids, than under.
### Using VDO as a PV:
1. under tpool
- The best fit - it will deduplicate additional redundancies among all
snapshots and will reduce the footprint.
- Risks: Resize! dmevent will not be able to handle resizing of tpool ATM.
2. under corig
- Cache fits better under VDO device - it will reduce amount of data, and
deduplicate, so there should be more hits.
- This is useful to keep the most frequently used data in cache
uncompressed (if that happens to be a bottleneck.)
3. under (multiple) linear LVs - e.g. used for VMs.
### And where VDO does not fit:
- *never* use VDO under LUKS volumes
- these are random data and do not compress nor deduplicate well,
- *never* use VDO under cmeta and tmeta LVs
- these are random data and do not compress nor deduplicate well,
- under raids
- raid{4,5,6} scrambles data, so they do not deduplicate well,
- raid{1,4,5,6,10} also causes amount of data grow, so more (duplicit in
case of raid{1,10}) work has to be done in order to find less duplicates.
### And where it could be useful:
- under snapshot CoW device - when there are multiple of those it could deduplicate
### Things to decide
- under integrity devices - it should work - mostly for data
- hash is not compressible and unique - it makes sense to have separate imeta and idata volumes for integrity devices
### Future Integration of VDO into LVM:
One issue is using both LUKS and RAID under VDO. We have two options:
- use mdadm x LUKS x VDO+LV
- use LV RAID x LUKS x VDO+LV - still requiring recursive LVs.
Another issue is duality of VDO - it is a top level LV but it can be seen as a "pool" for multiple devices.
- This is one usecase which could not be handled by LVM at the moment.
- Size of the VDO is its physical size and virtual size - just like tpool.
- same problems with virtual vs physical size - it can get full, without exposing it fo a FS
Another possible RFE is to split data and metadata:
- e.g. keep data on HDD and metadata on SSD
## Issues / Testing
- fstrim/discard pass down - does it work with VDO?
- VDO can run in synchronous vs. asynchronous mode
- synchronous for devices where write is safe after it is confirmed. Some devices are lying.
- asynchronous for devices requiring flush
- multiple devices under VDO - need to find common options
- pvmove - changing characteristics of underlying device
- autoactivation during boot
- Q: can we use VDO for RootFS?

View File

@@ -1,5 +1,8 @@
/* include/configure.h.in. Generated from configure.in by autoheader. */
/* Define to 1 if aio is available. */
#undef AIO_SUPPORT
/* Define to 1 to use libblkid detection of signatures when wiping. */
#undef BLKID_WIPING_SUPPORT
@@ -685,9 +688,6 @@
/* Define to 1 to include the LVM readline shell. */
#undef READLINE_SUPPORT
/* Define to 1 to include built-in support for replicators. */
#undef REPLICATOR_INTERNAL
/* Define as the return type of signal handlers (`int' or `void'). */
#undef RETSIGTYPE

View File

@@ -36,10 +36,6 @@ ifeq ("@RAID@", "shared")
SUBDIRS += raid
endif
ifeq ("@REPLICATORS@", "shared")
SUBDIRS += replicator
endif
ifeq ("@THIN@", "shared")
SUBDIRS += thin
endif
@@ -100,13 +96,13 @@ SOURCES =\
metadata/lv_manip.c \
metadata/merge.c \
metadata/metadata.c \
metadata/metadata-liblvm.c \
metadata/mirror.c \
metadata/pool_manip.c \
metadata/pv.c \
metadata/pv_manip.c \
metadata/pv_map.c \
metadata/raid_manip.c \
metadata/replicator_manip.c \
metadata/segtype.c \
metadata/snapshot_manip.c \
metadata/thin_manip.c \
@@ -165,10 +161,6 @@ ifeq ("@RAID@", "internal")
SOURCES += raid/raid.c
endif
ifeq ("@REPLICATORS@", "internal")
SOURCES += replicator/replicator.c
endif
ifeq ("@THIN@", "internal")
SOURCES += thin/thin.c
endif
@@ -215,7 +207,6 @@ ifeq ($(MAKECMDGOALS),distclean)
mirror \
notify \
raid \
replicator \
thin \
cache_segtype \
locking

View File

@@ -323,12 +323,6 @@ int lvs_in_vg_opened(const struct volume_group *vg)
{
return 0;
}
/******
int lv_suspend(struct cmd_context *cmd, const char *lvid_s)
{
return 1;
}
*******/
int lv_suspend_if_active(struct cmd_context *cmd, const char *lvid_s, unsigned origin_only, unsigned exclusive,
const struct logical_volume *lv, const struct logical_volume *lv_pre)
{
@@ -1707,7 +1701,7 @@ static char *_build_target_uuid(struct cmd_context *cmd, const struct logical_vo
if (lv_is_thin_pool(lv))
layer = "tpool"; /* Monitor "tpool" for the "thin pool". */
else if (lv_is_origin(lv))
else if (lv_is_origin(lv) || lv_is_external_origin(lv))
layer = "real"; /* Monitor "real" for "snapshot-origin". */
else
layer = NULL;
@@ -1949,6 +1943,13 @@ int monitor_dev_for_events(struct cmd_context *cmd, const struct logical_volume
r = 0;
}
if (seg->external_lv &&
!monitor_dev_for_events(cmd, seg->external_lv,
(!monitor) ? laopts : NULL, monitor)) {
stack;
r = 0;
}
if (seg->metadata_lv &&
!monitor_dev_for_events(cmd, seg->metadata_lv, NULL, monitor)) {
stack;
@@ -2082,12 +2083,16 @@ static int _lv_suspend(struct cmd_context *cmd, const char *lvid_s,
const struct logical_volume *pvmove_lv = NULL;
const struct logical_volume *lv_to_free = NULL;
const struct logical_volume *lv_pre_to_free = NULL;
struct logical_volume *lv_pre_tmp;
struct logical_volume *lv_pre_tmp, *lv_tmp;
struct seg_list *sl;
struct lv_segment *snap_seg;
struct lvinfo info;
int r = 0, lockfs = 0, flush_required = 0;
struct detached_lv_data detached;
struct dm_pool *mem = NULL;
struct dm_list suspend_lvs;
struct lv_list *lvl;
int found;
if (!activation())
return 1;
@@ -2125,9 +2130,6 @@ static int _lv_suspend(struct cmd_context *cmd, const char *lvid_s,
goto out;
}
if (!lv_read_replicator_vgs(lv))
goto_out;
lv_calculate_readahead(lv, NULL);
/*
@@ -2157,6 +2159,12 @@ static int _lv_suspend(struct cmd_context *cmd, const char *lvid_s,
}
if (!_lv_preload(lv_pre_tmp, laopts, &flush_required))
goto_out;
/* Suspending 1st. LV above PVMOVE suspends whole tree */
dm_list_iterate_items(sl, &pvmove_lv->segs_using_this_lv) {
lv = sl->seg->lv;
break;
}
} else {
if (!_lv_preload(lv_pre, laopts, &flush_required))
/* FIXME Revert preloading */
@@ -2194,7 +2202,7 @@ static int _lv_suspend(struct cmd_context *cmd, const char *lvid_s,
* NOTE: Mirror repair requires noflush for proper repair!
* TODO: Relax this limiting condition further */
if (!flush_required &&
(lv_is_pvmove(lv) ||
(lv_is_pvmove(lv) || pvmove_lv ||
(!lv_is_mirror(lv) && !lv_is_thin_pool(lv) && !lv_is_thin_volume(lv)))) {
log_debug("Requiring flush for LV %s.", display_lvname(lv));
flush_required = 1;
@@ -2204,10 +2212,6 @@ static int _lv_suspend(struct cmd_context *cmd, const char *lvid_s,
/* FIXME Consider aborting here */
stack;
critical_section_inc(cmd, "suspending");
if (pvmove_lv)
critical_section_inc(cmd, "suspending pvmove LV");
if (!laopts->origin_only &&
(lv_is_origin(lv_pre) || lv_is_cow(lv_pre)))
lockfs = 1;
@@ -2219,40 +2223,68 @@ static int _lv_suspend(struct cmd_context *cmd, const char *lvid_s,
if (laopts->origin_only && lv_is_thin_volume(lv) && lv_is_thin_volume(lv_pre))
lockfs = 1;
/*
* Suspending an LV directly above a PVMOVE LV also
* suspends other LVs using that same PVMOVE LV.
* FIXME Remove this and delay the 'clear node' until
* after the code knows whether there's a different
* inactive table to load or not instead so lv_suspend
* can be called separately for each LV safely.
*/
if ((lv_pre->vg->status & PRECOMMITTED) &&
lv_is_locked(lv_pre) && find_pvmove_lv_in_lv(lv_pre)) {
if (!_lv_suspend_lv(lv_pre, laopts, lockfs, flush_required)) {
critical_section_dec(cmd, "failed precommitted suspend");
if (pvmove_lv)
critical_section_dec(cmd, "failed precommitted suspend (pvmove)");
critical_section_inc(cmd, "suspending");
if (!lv_is_locked(lv) && lv_is_locked(lv_pre) &&
(pvmove_lv = find_pvmove_lv_in_lv(lv_pre))) {
/*
* When starting PVMOVE, suspend participating LVs first
* with committed metadata by looking at precommited pvmove list.
* In committed metadata these LVs are not connected in any way.
*
* TODO: prepare list of LVs needed to be suspended and pass them
* via 'struct laopts' directly to _lv_suspend_lv() and handle this
* with a single 'dmtree' call.
*/
if (!(mem = dm_pool_create("suspend_lvs", 128)))
goto_out;
/* Prepare list of all LVs for suspend ahead */
dm_list_init(&suspend_lvs);
dm_list_iterate_items(sl, &pvmove_lv->segs_using_this_lv) {
lv_tmp = sl->seg->lv;
if (lv_is_cow(lv_tmp))
/* Never suspend COW, always has to be origin */
lv_tmp = origin_from_cow(lv_tmp);
found = 0;
dm_list_iterate_items(lvl, &suspend_lvs)
if (strcmp(lvl->lv->name, lv_tmp->name) == 0) {
found = 1;
break;
}
if (found)
continue; /* LV is already in the list */
if (!(lvl = dm_pool_alloc(mem, sizeof(*lvl)))) {
log_error("lv_list alloc failed.");
goto out;
}
/* Look for precommitted LV name in commmitted VG */
if (!(lvl->lv = find_lv(lv->vg, lv_tmp->name))) {
log_error(INTERNAL_ERROR "LV %s missing from preload metadata.",
display_lvname(lv_tmp));
goto out;
}
dm_list_add(&suspend_lvs, &lvl->list);
}
} else {
/* Normal suspend */
dm_list_iterate_items(lvl, &suspend_lvs)
if (!_lv_suspend_lv(lvl->lv, laopts, lockfs, 1)) {
critical_section_dec(cmd, "failed suspend");
goto_out; /* FIXME: resume on recovery path? */
}
} else /* Standard suspend */
if (!_lv_suspend_lv(lv, laopts, lockfs, flush_required)) {
critical_section_dec(cmd, "failed suspend");
if (pvmove_lv)
critical_section_dec(cmd, "failed suspend (pvmove)");
goto_out;
}
}
r = 1;
out:
if (mem)
dm_pool_destroy(mem);
if (lv_pre_to_free)
release_vg(lv_pre_to_free->vg);
if (lv_to_free) {
lv_release_replicator_vgs(lv_to_free);
if (lv_to_free)
release_vg(lv_to_free->vg);
}
return r;
}
@@ -2274,12 +2306,29 @@ int lv_suspend_if_active(struct cmd_context *cmd, const char *lvid_s, unsigned o
return _lv_suspend(cmd, lvid_s, &laopts, 0, lv, lv_pre);
}
static int _check_suspended_lv(struct logical_volume *lv, void *data)
{
struct lvinfo info;
if (lv_info(lv->vg->cmd, lv, 0, &info, 0, 0) && info.exists && info.suspended) {
log_debug("Found suspended LV %s in critical section().", display_lvname(lv));
return 0; /* There is suspended subLV in the tree */
}
if (lv_layer(lv) && lv_info(lv->vg->cmd, lv, 1, &info, 0, 0) && info.exists && info.suspended) {
log_debug("Found suspended layered LV %s in critical section().", display_lvname(lv));
return 0; /* There is suspended subLV in the tree */
}
return 1;
}
static int _lv_resume(struct cmd_context *cmd, const char *lvid_s,
struct lv_activate_opts *laopts, int error_if_not_active,
const struct logical_volume *lv)
{
const struct logical_volume *lv_to_free = NULL;
struct dm_list *snh;
struct lvinfo info;
int r = 0;
@@ -2313,12 +2362,28 @@ static int _lv_resume(struct cmd_context *cmd, const char *lvid_s,
if (!info.exists || !info.suspended) {
if (error_if_not_active)
goto_out;
r = 1;
if (!info.suspended)
critical_section_dec(cmd, "already resumed");
goto out;
}
/* ATM only thin-pool with origin-only suspend does not really suspend anything
* it's used only for message passing to thin-pool */
if (laopts->origin_only && lv_is_thin_pool(lv))
critical_section_dec(cmd, "resumed");
if (!info.suspended && critical_section()) {
/* Validation check if any subLV is suspended */
if (!laopts->origin_only && lv_is_origin(lv)) {
/* Check all snapshots for this origin LV */
dm_list_iterate(snh, &lv->snapshot_segs)
if (!_check_suspended_lv(dm_list_struct_base(snh, struct lv_segment, origin_list)->cow, NULL))
goto needs_resume; /* Found suspended snapshot */
}
if ((r = for_each_sub_lv((struct logical_volume *)lv, &_check_suspended_lv, NULL)))
goto out; /* Nothing was found suspended */
} else {
r = 1;
goto out;
}
}
needs_resume:
laopts->read_only = _passes_readonly_filter(cmd, lv);
laopts->resuming = 1;
@@ -2436,14 +2501,21 @@ int lv_deactivate(struct cmd_context *cmd, const char *lvid_s, const struct logi
goto_out;
}
if (!lv_read_replicator_vgs(lv))
goto_out;
if (!monitor_dev_for_events(cmd, lv, &laopts, 0))
stack;
critical_section_inc(cmd, "deactivating");
r = _lv_deactivate(lv);
/*
* Remove any transiently activated error
* devices which arean't used any more.
*/
if (r && lv_is_raid(lv) && !lv_deactivate_any_missing_subdevs(lv)) {
log_error("Failed to remove temporary SubLVs from %s",
display_lvname(lv));
r = 0;
}
critical_section_dec(cmd, "deactivated");
if (!lv_info(cmd, lv, 0, &info, 0, 0) || info.exists) {
@@ -2453,10 +2525,8 @@ int lv_deactivate(struct cmd_context *cmd, const char *lvid_s, const struct logi
r = 0;
}
out:
if (lv_to_free) {
lv_release_replicator_vgs(lv_to_free);
if (lv_to_free)
release_vg(lv_to_free->vg);
}
return r;
}
@@ -2504,6 +2574,15 @@ static int _lv_activate(struct cmd_context *cmd, const char *lvid_s,
if (!lv && !(lv_to_free = lv = lv_from_lvid(cmd, lvid_s, 0)))
goto out;
if (!laopts->exclusive &&
(lv_is_origin(lv) ||
seg_only_exclusive(first_seg(lv)))) {
log_error(INTERNAL_ERROR "Trying non-exlusive activation of %s with "
"a volume type %s requiring exclusive activation.",
display_lvname(lv), lvseg_name(first_seg(lv)));
return 0;
}
if (filter && !_passes_activation_filter(cmd, lv)) {
log_verbose("Not activating %s since it does not pass "
"activation filter.", display_lvname(lv));
@@ -2571,9 +2650,6 @@ static int _lv_activate(struct cmd_context *cmd, const char *lvid_s,
goto out;
}
if (!lv_read_replicator_vgs(lv))
goto_out;
lv_calculate_readahead(lv, NULL);
critical_section_inc(cmd, "activating");
@@ -2585,10 +2661,8 @@ static int _lv_activate(struct cmd_context *cmd, const char *lvid_s,
stack;
out:
if (lv_to_free) {
lv_release_replicator_vgs(lv_to_free);
if (lv_to_free)
release_vg(lv_to_free->vg);
}
return r;
}
@@ -2680,10 +2754,8 @@ static int _lv_remove_any_missing_subdevs(struct logical_volume *lv)
struct lv_segment *seg;
dm_list_iterate_items(seg, &lv->segments) {
if (seg->area_count != 1)
return_0;
if (dm_snprintf(name, sizeof(name), "%s-%s-missing_%u_0", seg->lv->vg->name, seg->lv->name, seg_no) < 0)
return 0;
return_0;
if (!_remove_dm_dev_by_name(name))
return 0;

View File

@@ -1717,6 +1717,114 @@ static uint16_t _get_udev_flags(struct dev_manager *dm, const struct logical_vol
return udev_flags;
}
static int _add_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
const struct logical_volume *lv, int origin_only);
static int _check_holder(struct dev_manager *dm, struct dm_tree *dtree,
const struct logical_volume *lv, uint32_t major,
const char *d_name)
{
const char *default_uuid_prefix = dm_uuid_prefix();
const size_t default_uuid_prefix_len = strlen(default_uuid_prefix);
const char *name;
const char *uuid;
struct dm_info info;
struct dm_task *dmt;
struct logical_volume *lv_det;
union lvid id;
int dev, r = 0;
errno = 0;
dev = strtoll(d_name + 3, NULL, 10);
if (errno) {
log_error("Failed to parse dm device minor number from %s.", d_name);
return 0;
}
if (!(dmt = _setup_task_run(DM_DEVICE_INFO, &info, NULL, NULL, NULL,
major, dev, 0, 0, 0)))
return_0;
if (info.exists) {
uuid = dm_task_get_uuid(dmt);
name = dm_task_get_name(dmt);
log_debug_activation("Checking holder of %s %s (" FMTu32 ":" FMTu32 ") %s.",
display_lvname(lv), uuid, info.major, info.minor,
name);
/* Skip common uuid prefix */
if (!strncmp(default_uuid_prefix, uuid, default_uuid_prefix_len))
uuid += default_uuid_prefix_len;
if (!strncmp(uuid, (char*)&lv->vg->id, sizeof(lv->vg->id)) &&
!dm_tree_find_node_by_uuid(dtree, uuid)) {
dm_strncpy((char*)&id, uuid, 2 * sizeof(struct id) + 1);
/* If UUID is not yet in dtree, look for matching LV */
if (!(lv_det = find_lv_in_vg_by_lvid(lv->vg, &id))) {
log_error("Cannot find holder with device name %s in VG %s.",
name, lv->vg->name);
goto out;
}
if (lv_is_cow(lv_det))
lv_det = origin_from_cow(lv_det);
log_debug_activation("Found holder %s of %s.",
display_lvname(lv_det),
display_lvname(lv));
if (!_add_lv_to_dtree(dm, dtree, lv_det, 0))
goto_out;
}
}
r = 1;
out:
dm_task_destroy(dmt);
return r;
}
/*
* Add exiting devices which holds given LV device open.
* This is used in case when metadata already do not contain information
* i.e. PVMOVE is being finished and final table is going to be resumed.
*/
static int _add_holders_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
const struct logical_volume *lv, struct dm_info *info)
{
const char *sysfs_dir = dm_sysfs_dir();
char sysfs_path[PATH_MAX];
struct dirent *dirent;
DIR *d;
int r = 0;
/* Sysfs path of holders */
if (dm_snprintf(sysfs_path, sizeof(sysfs_path), "%sblock/dm-" FMTu32
"/holders", sysfs_dir, info->minor) < 0) {
log_error("sysfs_path dm_snprintf failed.");
return 0;
}
if (!(d = opendir(sysfs_path))) {
log_sys_error("opendir", sysfs_path);
return 0;
}
while ((dirent = readdir(d)))
/* Expects minor is added to 'dm-' prefix */
if (!strncmp(dirent->d_name, "dm-", 3) &&
!_check_holder(dm, dtree, lv, info->major, dirent->d_name))
goto_out;
r = 1;
out:
if (closedir(d))
log_sys_debug("closedir", "holders");
return r;
}
static int _add_dev_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
const struct logical_volume *lv, const char *layer)
{
@@ -1771,83 +1879,14 @@ static int _add_dev_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
return_0;
}
return 1;
}
/*
* Add replicator devices
*
* Using _add_dev_to_dtree() directly instead of _add_lv_to_dtree()
* to avoid extra checks with extensions.
*/
static int _add_partial_replicator_to_dtree(struct dev_manager *dm,
struct dm_tree *dtree,
const struct logical_volume *lv)
{
struct logical_volume *rlv = first_seg(lv)->replicator;
struct replicator_device *rdev;
struct replicator_site *rsite;
struct dm_tree_node *rep_node, *rdev_node;
const char *uuid;
if (!lv_is_active_replicator_dev(lv)) {
if (!_add_dev_to_dtree(dm, dtree, lv->rdevice->lv,
NULL))
/*
* Find holders of existing active LV where name starts with 'pvmove',
* but it's not anymore PVMOVE LV and also it's not PVMOVE _mimage
*/
if (info.exists && !lv_is_pvmove(lv) &&
!strchr(lv->name, '_') && !strncmp(lv->name, "pvmove", 6))
if (!_add_holders_to_dtree(dm, dtree, lv, &info))
return_0;
return 1;
}
/* Add _rlog and replicator device */
if (!_add_dev_to_dtree(dm, dtree, first_seg(rlv)->rlog_lv, NULL))
return_0;
if (!_add_dev_to_dtree(dm, dtree, rlv, NULL))
return_0;
if (!(uuid = build_dm_uuid(dm->mem, rlv, NULL)))
return_0;
rep_node = dm_tree_find_node_by_uuid(dtree, uuid);
/* Add all related devices for replicator */
dm_list_iterate_items(rsite, &rlv->rsites)
dm_list_iterate_items(rdev, &rsite->rdevices) {
if (rsite->state == REPLICATOR_STATE_ACTIVE) {
/* Add _rimage LV */
if (!_add_dev_to_dtree(dm, dtree, rdev->lv, NULL))
return_0;
/* Add replicator-dev LV, except of the already added one */
if ((lv != rdev->replicator_dev->lv) &&
!_add_dev_to_dtree(dm, dtree,
rdev->replicator_dev->lv, NULL))
return_0;
/* If replicator exists - try connect existing heads */
if (rep_node) {
uuid = build_dm_uuid(dm->mem,
rdev->replicator_dev->lv,
NULL);
if (!uuid)
return_0;
rdev_node = dm_tree_find_node_by_uuid(dtree, uuid);
if (rdev_node)
dm_tree_node_set_presuspend_node(rdev_node,
rep_node);
}
}
if (!rdev->rsite->vg_name)
continue;
if (!_add_dev_to_dtree(dm, dtree, rdev->lv, NULL))
return_0;
if (rdev->slog &&
!_add_dev_to_dtree(dm, dtree, rdev->slog, NULL))
return_0;
}
return 1;
}
@@ -1865,7 +1904,7 @@ struct pool_cb_data {
static int _pool_callback(struct dm_tree_node *node,
dm_node_callback_t type, void *cb_data)
{
int ret, status, fd;
int ret, status = 0, fd;
const struct dm_config_node *cn;
const struct dm_config_value *cv;
const struct pool_cb_data *data = cb_data;
@@ -1873,12 +1912,45 @@ static int _pool_callback(struct dm_tree_node *node,
const struct logical_volume *mlv = first_seg(pool_lv)->metadata_lv;
long buf[64 / sizeof(long)]; /* buffer for short disk header (64B) */
int args = 0;
char *mpath;
const char *argv[19] = { /* Max supported 15 args */
find_config_tree_str_allow_empty(pool_lv->vg->cmd, data->exec, NULL) /* argv[0] */
find_config_tree_str_allow_empty(pool_lv->vg->cmd, data->exec, NULL)
};
if (!*argv[0])
return 1; /* Checking disabled */
if (!*argv[0]) /* *_check tool is unconfigured/disabled with "" setting */
return 1;
if (!(mpath = lv_dmpath_dup(data->dm->mem, mlv))) {
log_error("Failed to build device path for checking pool metadata %s.",
display_lvname(mlv));
return 0;
}
if (data->skip_zero) {
if ((fd = open(mpath, O_RDONLY)) < 0) {
log_sys_error("open", mpath);
return 0;
}
/* let's assume there is no problem to read 64 bytes */
if (read(fd, buf, sizeof(buf)) < (int)sizeof(buf)) {
log_sys_error("read", mpath);
if (close(fd))
log_sys_error("close", mpath);
return 0;
}
for (ret = 0; ret < (int) DM_ARRAY_SIZE(buf); ++ret)
if (buf[ret])
break;
if (close(fd))
log_sys_error("close", mpath);
if (ret == (int) DM_ARRAY_SIZE(buf)) {
log_debug_activation("Metadata checking skipped, detected empty disk header on %s.",
mpath);
return 1;
}
}
if (!(cn = find_config_tree_array(mlv->vg->cmd, data->opts, NULL))) {
log_error(INTERNAL_ERROR "Unable to find configuration for pool check options.");
@@ -1900,36 +1972,7 @@ static int _pool_callback(struct dm_tree_node *node,
return 0;
}
if (!(argv[++args] = lv_dmpath_dup(data->dm->mem, mlv))) {
log_error("Failed to build pool metadata path.");
return 0;
}
if (data->skip_zero) {
if ((fd = open(argv[args], O_RDONLY)) < 0) {
log_sys_error("open", argv[args]);
return 0;
}
/* let's assume there is no problem to read 64 bytes */
if (read(fd, buf, sizeof(buf)) < (int)sizeof(buf)) {
log_sys_error("read", argv[args]);
if (close(fd))
log_sys_error("close", argv[args]);
return 0;
}
for (ret = 0; ret < (int) DM_ARRAY_SIZE(buf); ++ret)
if (buf[ret])
break;
if (close(fd))
log_sys_error("close", argv[args]);
if (ret == (int) DM_ARRAY_SIZE(buf)) {
log_debug_activation("%s skipped, detect empty disk header on %s.",
argv[0], argv[args]);
return 1;
}
}
argv[++args] = mpath;
if (!(ret = exec_cmd(pool_lv->vg->cmd, (const char * const *)argv,
&status, 0))) {
@@ -2017,6 +2060,10 @@ static int _add_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
struct lv_segment *seg;
struct dm_tree_node *node;
const char *uuid;
const struct logical_volume *plv;
if (lv_is_pvmove(lv) && (dm->track_pvmove_deps == 2))
return 1; /* Avoid rechecking of already seen pvmove LV */
if (lv_is_cache_pool(lv)) {
if (!dm_list_empty(&lv->segs_using_this_lv)) {
@@ -2137,11 +2184,14 @@ static int _add_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
return_0;
/* Add any LVs referencing a PVMOVE LV unless told not to. */
if (dm->track_pvmove_deps && lv_is_pvmove(lv)) {
dm->track_pvmove_deps = 0;
dm_list_iterate_items(sl, &lv->segs_using_this_lv)
if (!_add_lv_to_dtree(dm, dtree, sl->seg->lv, origin_only))
if ((dm->track_pvmove_deps == 1) && lv_is_pvmove(lv)) {
dm->track_pvmove_deps = 2; /* Mark as already seen */
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
/* If LV is snapshot COW - whole snapshot needs reload */
plv = lv_is_cow(sl->seg->lv) ? origin_from_cow(sl->seg->lv) : sl->seg->lv;
if (!_add_lv_to_dtree(dm, dtree, plv, 0))
return_0;
}
dm->track_pvmove_deps = 1;
}
@@ -2156,11 +2206,6 @@ static int _add_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
}
}
/* Adding LV head of replicator adds all other related devs */
if (lv_is_replicator_dev(lv) &&
!_add_partial_replicator_to_dtree(dm, dtree, lv))
return_0;
/* Add any LVs used by segments in this LV */
dm_list_iterate_items(seg, &lv->segments) {
if (seg->external_lv && dm->track_external_lv_deps &&
@@ -2524,64 +2569,6 @@ static int _add_new_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
struct lv_activate_opts *laopts,
const char *layer);
/* Add all replicators' LVs */
static int _add_replicator_dev_target_to_dtree(struct dev_manager *dm,
struct dm_tree *dtree,
struct lv_segment *seg,
struct lv_activate_opts *laopts)
{
struct replicator_device *rdev;
struct replicator_site *rsite;
/* For inactive replicator add linear mapping */
if (!lv_is_active_replicator_dev(seg->lv)) {
if (!_add_new_lv_to_dtree(dm, dtree, seg->lv->rdevice->lv, laopts, NULL))
return_0;
return 1;
}
/* Add rlog and replicator nodes */
if (!seg->replicator ||
!first_seg(seg->replicator)->rlog_lv ||
!_add_new_lv_to_dtree(dm, dtree,
first_seg(seg->replicator)->rlog_lv,
laopts, NULL) ||
!_add_new_lv_to_dtree(dm, dtree, seg->replicator, laopts, NULL))
return_0;
/* Activation of one replicator_dev node activates all other nodes */
dm_list_iterate_items(rsite, &seg->replicator->rsites) {
dm_list_iterate_items(rdev, &rsite->rdevices) {
if (rdev->lv &&
!_add_new_lv_to_dtree(dm, dtree, rdev->lv,
laopts, NULL))
return_0;
if (rdev->slog &&
!_add_new_lv_to_dtree(dm, dtree, rdev->slog,
laopts, NULL))
return_0;
}
}
/* Add remaining replicator-dev nodes in the second loop
* to avoid multiple retries for inserting all elements */
dm_list_iterate_items(rsite, &seg->replicator->rsites) {
if (rsite->state != REPLICATOR_STATE_ACTIVE)
continue;
dm_list_iterate_items(rdev, &rsite->rdevices) {
if (rdev->replicator_dev->lv == seg->lv)
continue;
if (!rdev->replicator_dev->lv ||
!_add_new_lv_to_dtree(dm, dtree,
rdev->replicator_dev->lv,
laopts, NULL))
return_0;
}
}
return 1;
}
static int _add_new_external_lv_to_dtree(struct dev_manager *dm,
struct dm_tree *dtree,
struct logical_volume *external_lv,
@@ -2682,11 +2669,6 @@ static int _add_segment_to_dtree(struct dev_manager *dm,
lv_layer(seg->pool_lv)))
return_0;
if (seg_is_replicator_dev(seg)) {
if (!_add_replicator_dev_target_to_dtree(dm, dtree, seg, laopts))
return_0;
}
/* Add any LVs used by this segment */
for (s = 0; s < seg->area_count; ++s) {
if ((seg_type(seg, s) == AREA_LV) &&
@@ -3156,8 +3138,6 @@ static int _tree_action(struct dev_manager *dm, const struct logical_volume *lv,
if (!dm_tree_preload_children(root, dlid, DLID_SIZE))
goto_out;
//if (action == PRELOAD) { log_debug("SLEEP"); sleep(7); }
if ((dm_tree_node_size_changed(root) < 0))
dm->flush_required = 1;
/* Currently keep the code require flush for any

61
lib/cache/lvmcache.c vendored
View File

@@ -141,6 +141,8 @@ void lvmcache_seed_infos_from_lvmetad(struct cmd_context *cmd)
/* Volume Group metadata cache functions */
static void _free_cached_vgmetadata(struct lvmcache_vginfo *vginfo)
{
struct lvmcache_info *info;
if (!vginfo || !vginfo->vgmetadata)
return;
@@ -154,7 +156,11 @@ static void _free_cached_vgmetadata(struct lvmcache_vginfo *vginfo)
vginfo->cft = NULL;
}
log_debug_cache("Metadata cache: VG %s wiped.", vginfo->vgname);
/* Invalidate any cached device buffers */
dm_list_iterate_items(info, &vginfo->infos)
devbufs_release(info->dev);
log_debug_cache("lvmcache: VG %s wiped.", vginfo->vgname);
release_vg(vginfo->cached_vg);
}
@@ -197,7 +203,7 @@ static void _store_metadata(struct volume_group *vg, unsigned precommitted)
return;
}
log_debug_cache("Metadata cache: VG %s (%s) stored (%" PRIsize_t " bytes%s).",
log_debug_cache("lvmcache: VG %s (%s) stored (%" PRIsize_t " bytes%s).",
vginfo->vgname, uuid, size,
precommitted ? ", precommitted" : "");
}
@@ -289,7 +295,7 @@ void lvmcache_commit_metadata(const char *vgname)
return;
if (vginfo->precommitted) {
log_debug_cache("Precommitted metadata cache: VG %s upgraded to committed.",
log_debug_cache("lvmcache: Upgraded pre-committed VG %s metadata to committed.",
vginfo->vgname);
vginfo->precommitted = 0;
}
@@ -542,7 +548,6 @@ const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd,
{
struct lvmcache_vginfo *vginfo;
struct lvmcache_info *info;
struct label *label;
struct dm_list *devh, *tmp;
struct dm_list devs;
struct device_list *devl;
@@ -587,7 +592,7 @@ const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd,
dm_list_iterate_safe(devh, tmp, &devs) {
devl = dm_list_item(devh, struct device_list);
(void) label_read(devl->dev, &label, UINT64_C(0));
(void) label_read(devl->dev, NULL, UINT64_C(0));
dm_list_del(&devl->list);
dm_free(devl);
}
@@ -616,7 +621,7 @@ struct lvmcache_vginfo *lvmcache_vginfo_from_vgid(const char *vgid)
id[ID_LEN] = '\0';
if (!(vginfo = dm_hash_lookup(_vgid_hash, id))) {
log_debug_cache("Metadata cache has no info for vgid \"%s\"", id);
log_debug_cache("lvmcache has no info for vgid \"%s\"", id);
return NULL;
}
@@ -770,10 +775,8 @@ char *lvmcache_vgname_from_pvid(struct cmd_context *cmd, const char *pvid)
static void _rescan_entry(struct lvmcache_info *info)
{
struct label *label;
if (info->status & CACHE_INVALID)
(void) label_read(info->dev, &label, UINT64_C(0));
(void) label_read(info->dev, NULL, UINT64_C(0));
}
static int _scan_invalid(void)
@@ -1095,17 +1098,31 @@ next:
goto next;
}
/* Track the number of outstanding label reads */
/* FIXME Switch to struct and also track failed */
static void _process_label_data(int failed, unsigned ioflags, void *context, const void *data)
{
int *nr_labels_outstanding = context;
if (!*nr_labels_outstanding) {
log_error(INTERNAL_ERROR "_process_label_data called too many times");
return;
}
(*nr_labels_outstanding)--;
}
int lvmcache_label_scan(struct cmd_context *cmd)
{
struct dm_list del_cache_devs;
struct dm_list add_cache_devs;
struct lvmcache_info *info;
struct device_list *devl;
struct label *label;
struct dev_iter *iter;
struct device *dev;
struct format_type *fmt;
int dev_count = 0;
int nr_labels_outstanding = 0;
int r = 0;
@@ -1144,13 +1161,22 @@ int lvmcache_label_scan(struct cmd_context *cmd)
_destroy_duplicate_device_list(&_found_duplicate_devs);
while ((dev = dev_iter_get(iter))) {
(void) label_read(dev, &label, UINT64_C(0));
log_debug_io("Scanning device %s", dev_name(dev));
nr_labels_outstanding++;
if (!label_read_callback(dev, UINT64_C(0), AIO_SUPPORTED_CODE_PATH, _process_label_data, &nr_labels_outstanding))
nr_labels_outstanding--;
dev_count++;
}
dev_iter_destroy(iter);
log_very_verbose("Scanned %d device labels", dev_count);
while (nr_labels_outstanding) {
log_very_verbose("Scanned %d device labels (%d outstanding)", dev_count, nr_labels_outstanding);
if (!dev_async_getevents())
return_0;
}
log_very_verbose("Scanned %d device labels (%d outstanding)", dev_count, nr_labels_outstanding);
/*
* _choose_preferred_devs() returns:
@@ -1184,7 +1210,7 @@ int lvmcache_label_scan(struct cmd_context *cmd)
dm_list_iterate_items(devl, &add_cache_devs) {
log_debug_cache("Rescan preferred device %s for lvmcache", dev_name(devl->dev));
(void) label_read(devl->dev, &label, UINT64_C(0));
(void) label_read(devl->dev, NULL, UINT64_C(0));
}
dm_list_splice(&_unused_duplicate_devs, &del_cache_devs);
@@ -1204,7 +1230,7 @@ int lvmcache_label_scan(struct cmd_context *cmd)
*/
if (_force_label_scan && cmd->is_long_lived &&
cmd->dump_filter && cmd->full_filter && cmd->full_filter->dump &&
!cmd->full_filter->dump(cmd->full_filter, 0))
!cmd->full_filter->dump(cmd->full_filter, cmd->mem, 0))
stack;
r = 1;
@@ -1505,7 +1531,6 @@ const char *lvmcache_pvid_from_devname(struct cmd_context *cmd,
const char *devname)
{
struct device *dev;
struct label *label;
if (!(dev = dev_cache_get(devname, cmd->filter))) {
log_error("%s: Couldn't find device. Check your filters?",
@@ -1513,7 +1538,7 @@ const char *lvmcache_pvid_from_devname(struct cmd_context *cmd,
return NULL;
}
if (!(label_read(dev, &label, UINT64_C(0))))
if (!(label_read(dev, NULL, UINT64_C(0))))
return NULL;
return dev->pvid;
@@ -1870,7 +1895,7 @@ static int _lvmcache_update_vgname(struct lvmcache_info *info,
vginfo->vgid[0] ? vginfo->vgid : "",
vginfo->vgid[0] ? ")" : "", mdabuf);
} else
log_debug_cache("lvmcache initialised VG %s.", vgname);
log_debug_cache("lvmcache: Initialised VG %s.", vgname);
return 1;
}
@@ -1979,7 +2004,7 @@ int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt)
return _lvmcache_update_vgname(NULL, vgname, vgname, 0, "", fmt);
}
int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary)
int lvmcache_update_vgname_and_id(struct lvmcache_info *info, const struct lvmcache_vgsummary *vgsummary)
{
const char *vgname = vgsummary->vgname;
const char *vgid = (char *)&vgsummary->vgid;

View File

@@ -85,7 +85,7 @@ void lvmcache_del(struct lvmcache_info *info);
/* Update things */
int lvmcache_update_vgname_and_id(struct lvmcache_info *info,
struct lvmcache_vgsummary *vgsummary);
const struct lvmcache_vgsummary *vgsummary);
int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted);
void lvmcache_lock_vgname(const char *vgname, int read_only);

45
lib/cache/lvmetad.c vendored
View File

@@ -39,7 +39,7 @@ static int64_t _lvmetad_update_timeout;
static int _found_lvm1_metadata = 0;
static struct volume_group *lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg);
static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg);
static uint64_t _monotonic_seconds(void)
{
@@ -551,6 +551,7 @@ static int _token_update(int *replaced_update)
daemon_reply reply;
const char *token_expected;
const char *prev_token;
const char *reply_str;
int update_pid;
int ending_our_update;
@@ -567,13 +568,14 @@ static int _token_update(int *replaced_update)
}
update_pid = (int)daemon_reply_int(reply, "update_pid", 0);
reply_str = daemon_reply_str(reply, "response", "");
/*
* A mismatch can only happen when this command attempts to set the
* token to filter:<hash> at the end of its update, but the update has
* been preempted in lvmetad by a new one (from update_pid).
*/
if (!strcmp(daemon_reply_str(reply, "response", ""), "token_mismatch")) {
if (!strcmp(reply_str, "token_mismatch")) {
token_expected = daemon_reply_str(reply, "expected", "");
ending_our_update = strcmp(_lvmetad_token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS);
@@ -599,7 +601,7 @@ static int _token_update(int *replaced_update)
return 0;
}
if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
if (strcmp(reply_str, "OK")) {
log_error("Failed response from lvmetad for token update.");
daemon_reply_destroy(reply);
return 0;
@@ -626,6 +628,7 @@ static int _lvmetad_handle_reply(daemon_reply reply, const char *id, const char
{
const char *token_expected;
const char *action;
const char *reply_str;
int action_modifies = 0;
int daemon_in_update;
int we_are_in_update;
@@ -670,8 +673,8 @@ static int _lvmetad_handle_reply(daemon_reply reply, const char *id, const char
/*
* Errors related to token mismatch.
*/
if (!strcmp(daemon_reply_str(reply, "response", ""), "token_mismatch")) {
reply_str = daemon_reply_str(reply, "response", "");
if (!strcmp(reply_str, "token_mismatch")) {
token_expected = daemon_reply_str(reply, "expected", "");
update_pid = (int)daemon_reply_int(reply, "update_pid", 0);
@@ -769,14 +772,14 @@ static int _lvmetad_handle_reply(daemon_reply reply, const char *id, const char
*/
/* All OK? */
if (!strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
if (!strcmp(reply_str, "OK")) {
if (found)
*found = 1;
return 1;
}
/* Unknown device permitted? */
if (found && !strcmp(daemon_reply_str(reply, "response", ""), "unknown")) {
if (found && !strcmp(reply_str, "unknown")) {
log_very_verbose("Request to %s %s%sin lvmetad did not find any matching object.",
action, object, *object ? " " : "");
*found = 0;
@@ -784,7 +787,7 @@ static int _lvmetad_handle_reply(daemon_reply reply, const char *id, const char
}
/* Multiple VGs with the same name were found. */
if (found && !strcmp(daemon_reply_str(reply, "response", ""), "multiple")) {
if (found && !strcmp(reply_str, "multiple")) {
log_very_verbose("Request to %s %s%sin lvmetad found multiple matching objects.",
action, object, *object ? " " : "");
if (found)
@@ -1090,7 +1093,7 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgna
* invalidated the cached vg.
*/
if (rescan) {
if (!(vg2 = lvmetad_pvscan_vg(cmd, vg))) {
if (!(vg2 = _lvmetad_pvscan_vg(cmd, vg))) {
log_debug_lvmetad("VG %s from lvmetad not found during rescan.", vgname);
fid = NULL;
release_vg(vg);
@@ -1516,7 +1519,7 @@ int lvmetad_vg_list_to_lvmcache(struct cmd_context *cmd)
return 1;
}
struct _extract_dl_baton {
struct extract_dl_baton {
int i;
struct dm_config_tree *cft;
struct dm_config_node *pre_sib;
@@ -1524,7 +1527,7 @@ struct _extract_dl_baton {
static int _extract_mda(struct metadata_area *mda, void *baton)
{
struct _extract_dl_baton *b = baton;
struct extract_dl_baton *b = baton;
struct dm_config_node *cn;
char id[32];
@@ -1545,7 +1548,7 @@ static int _extract_mda(struct metadata_area *mda, void *baton)
static int _extract_disk_location(const char *name, struct disk_locn *dl, void *baton)
{
struct _extract_dl_baton *b = baton;
struct extract_dl_baton *b = baton;
struct dm_config_node *cn;
char id[32];
@@ -1580,7 +1583,7 @@ static int _extract_ba(struct disk_locn *ba, void *baton)
static int _extract_mdas(struct lvmcache_info *info, struct dm_config_tree *cft,
struct dm_config_node *pre_sib)
{
struct _extract_dl_baton baton = { .cft = cft };
struct extract_dl_baton baton = { .cft = cft };
if (!lvmcache_foreach_mda(info, &_extract_mda, &baton))
return 0;
@@ -1607,7 +1610,7 @@ int lvmetad_pv_found(struct cmd_context *cmd, const struct id *pvid, struct devi
struct dm_config_tree *pvmeta, *vgmeta;
const char *status = NULL, *vgname = NULL;
int64_t changed = 0;
int result;
int result, seqno_after;
if (!lvmetad_used() || test_mode())
return 1;
@@ -1672,10 +1675,12 @@ int lvmetad_pv_found(struct cmd_context *cmd, const struct id *pvid, struct devi
result = _lvmetad_handle_reply(reply, "pv_found", uuid, NULL);
if (vg && result &&
(daemon_reply_int(reply, "seqno_after", -1) != vg->seqno ||
daemon_reply_int(reply, "seqno_after", -1) != daemon_reply_int(reply, "seqno_before", -1)))
log_warn("WARNING: Inconsistent metadata found for VG %s", vg->name);
if (vg && result) {
seqno_after = daemon_reply_int(reply, "seqno_after", -1);
if ((seqno_after != vg->seqno) ||
(seqno_after != daemon_reply_int(reply, "seqno_before", -1)))
log_warn("WARNING: Inconsistent metadata found for VG %s", vg->name);
}
if (result && found_vgnames) {
status = daemon_reply_str(reply, "status", NULL);
@@ -1766,7 +1771,7 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
struct volume_group *vg;
if (mda_is_ignored(mda) ||
!(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1)))
!(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1, 0)))
return 1;
/* FIXME Also ensure contents match etc. */
@@ -1787,7 +1792,7 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
* the VG, and that PV may have been reused for another VG.
*/
static struct volume_group *lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg)
static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg)
{
char pvid_s[ID_LEN + 1] __attribute__((aligned(8)));
char uuid[64] __attribute__((aligned(8)));

View File

@@ -54,7 +54,7 @@
# include <malloc.h>
#endif
static const size_t linebuffer_size = 4096;
static const size_t _linebuffer_size = 4096;
/*
* Copy the input string, removing invalid characters.
@@ -283,6 +283,8 @@ static int _parse_debug_classes(struct cmd_context *cmd)
debug_classes |= LOG_CLASS_LVMPOLLD;
else if (!strcasecmp(cv->v.str, "dbus"))
debug_classes |= LOG_CLASS_DBUS;
else if (!strcasecmp(cv->v.str, "io"))
debug_classes |= LOG_CLASS_IO;
else
log_verbose("Unrecognised value for log/debug_classes: %s", cv->v.str);
}
@@ -564,7 +566,7 @@ static int _process_config(struct cmd_context *cmd)
#ifdef DEVMAPPER_SUPPORT
dm_set_dev_dir(cmd->dev_dir);
if (!dm_set_uuid_prefix("LVM-"))
if (!dm_set_uuid_prefix(UUID_PREFIX))
return_0;
#endif
@@ -633,6 +635,16 @@ static int _process_config(struct cmd_context *cmd)
*/
cmd->default_settings.udev_fallback = udev_disabled ? 1 : -1;
#ifdef AIO_SUPPORT
cmd->use_aio = find_config_tree_bool(cmd, devices_use_aio_CFG, NULL);
#else
cmd->use_aio = 0;
#endif
if (cmd->use_aio && !dev_async_setup(cmd))
cmd->use_aio = 0;
log_debug_io("%ssing asynchronous I/O.", cmd->use_aio ? "U" : "Not u");
init_retry_deactivation(find_config_tree_bool(cmd, activation_retry_deactivation_CFG, NULL));
init_activation_checks(find_config_tree_bool(cmd, activation_checks_CFG, NULL));
@@ -1285,7 +1297,7 @@ int init_filters(struct cmd_context *cmd, unsigned load_persistent_cache)
lvm_stat_ctim(&ts, &st);
cts = config_file_timestamp(cmd->cft);
if (timespeccmp(&ts, &cts, >) &&
!persistent_filter_load(cmd->filter, NULL))
!persistent_filter_load(cmd->mem, cmd->filter, NULL))
log_verbose("Failed to load existing device cache from %s",
dev_cache);
}
@@ -1501,11 +1513,6 @@ static int _init_segtypes(struct cmd_context *cmd)
dm_list_add(&cmd->segtypes, &segtype->list);
}
#ifdef REPLICATOR_INTERNAL
if (!init_replicator_segtype(cmd, &seglib))
return 0;
#endif
#ifdef RAID_INTERNAL
if (!init_raid_segtypes(cmd, &seglib))
return 0;
@@ -1878,7 +1885,7 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived,
/* Set in/out stream buffering before glibc */
if (set_buffering) {
/* Allocate 2 buffers */
if (!(cmd->linebuffer = dm_malloc(2 * linebuffer_size))) {
if (!(cmd->linebuffer = dm_malloc(2 * _linebuffer_size))) {
log_error("Failed to allocate line buffer.");
goto out;
}
@@ -1889,7 +1896,7 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived,
(flags & O_ACCMODE) != O_WRONLY) {
if (!reopen_standard_stream(&stdin, "r"))
goto_out;
if (setvbuf(stdin, cmd->linebuffer, _IOLBF, linebuffer_size)) {
if (setvbuf(stdin, cmd->linebuffer, _IOLBF, _linebuffer_size)) {
log_sys_error("setvbuf", "");
goto out;
}
@@ -1900,8 +1907,8 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived,
(flags & O_ACCMODE) != O_RDONLY) {
if (!reopen_standard_stream(&stdout, "w"))
goto_out;
if (setvbuf(stdout, cmd->linebuffer + linebuffer_size,
_IOLBF, linebuffer_size)) {
if (setvbuf(stdout, cmd->linebuffer + _linebuffer_size,
_IOLBF, _linebuffer_size)) {
log_sys_error("setvbuf", "");
goto out;
}
@@ -2141,6 +2148,8 @@ int refresh_toolcontext(struct cmd_context *cmd)
cmd->lib_dir = NULL;
label_init();
if (!_init_lvm_conf(cmd))
return_0;
@@ -2228,7 +2237,7 @@ void destroy_toolcontext(struct cmd_context *cmd)
int flags;
if (cmd->dump_filter && cmd->filter && cmd->filter->dump &&
!cmd->filter->dump(cmd->filter, 1))
!cmd->filter->dump(cmd->filter, cmd->mem, 1))
stack;
archive_exit(cmd);

View File

@@ -160,9 +160,11 @@ struct cmd_context {
unsigned lockd_vg_rescan:1;
unsigned lockd_vg_default_sh:1;
unsigned lockd_vg_enforce_sh:1;
unsigned lockd_lv_sh:1;
unsigned vg_notify:1;
unsigned lv_notify:1;
unsigned pv_notify:1;
unsigned use_aio:1;
/*
* Filtering.

View File

@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -279,7 +279,7 @@ struct dm_config_tree *config_file_open_and_read(const char *config_file,
}
log_very_verbose("Loading config file: %s", config_file);
if (!config_file_read(cft)) {
if (!config_file_read(cmd->mem, cft)) {
log_error("Failed to load config file %s", config_file);
goto bad;
}
@@ -489,32 +489,102 @@ int override_config_tree_from_profile(struct cmd_context *cmd,
return 0;
}
struct process_config_file_params {
struct dm_config_tree *cft;
struct device *dev;
off_t offset;
size_t size;
off_t offset2;
size_t size2;
checksum_fn_t checksum_fn;
uint32_t checksum;
int checksum_only;
int no_dup_node_check;
lvm_callback_fn_t config_file_read_fd_callback;
void *config_file_read_fd_context;
int ret;
};
static void _process_config_file_buffer(int failed, unsigned ioflags, void *context, const void *data)
{
struct process_config_file_params *pcfp = context;
const char *fb = data, *fe;
if (failed) {
pcfp->ret = 0;
goto_out;
}
if (pcfp->checksum_fn && pcfp->checksum !=
(pcfp->checksum_fn(pcfp->checksum_fn(INITIAL_CRC, (const uint8_t *)fb, pcfp->size),
(const uint8_t *)(fb + pcfp->size), pcfp->size2))) {
log_error("%s: Checksum error at offset %" PRIu64, dev_name(pcfp->dev), (uint64_t) pcfp->offset);
pcfp->ret = 0;
goto out;
}
if (!pcfp->checksum_only) {
fe = fb + pcfp->size + pcfp->size2;
if (pcfp->no_dup_node_check) {
if (!dm_config_parse_without_dup_node_check(pcfp->cft, fb, fe))
pcfp->ret = 0;
} else if (!dm_config_parse(pcfp->cft, fb, fe))
pcfp->ret = 0;
}
out:
if (pcfp->config_file_read_fd_callback)
pcfp->config_file_read_fd_callback(!pcfp->ret, ioflags, pcfp->config_file_read_fd_context, NULL);
}
/*
* When checksum_only is set, the checksum of buffer is only matched
* and function avoids parsing of mda into config tree which
* remains unmodified and should not be used.
*/
int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
int config_file_read_fd(struct dm_pool *mem, struct dm_config_tree *cft, struct device *dev, dev_io_reason_t reason,
off_t offset, size_t size, off_t offset2, size_t size2,
checksum_fn_t checksum_fn, uint32_t checksum,
int checksum_only, int no_dup_node_check)
int checksum_only, int no_dup_node_check, unsigned ioflags,
lvm_callback_fn_t config_file_read_fd_callback, void *config_file_read_fd_context)
{
char *fb, *fe;
char *fb;
int r = 0;
int use_mmap = 1;
off_t mmap_offset = 0;
char *buf = NULL;
int use_mmap = 1;
const char *buf = NULL;
unsigned circular = size2 ? 1 : 0; /* Wrapped around end of disk metadata buffer? */
struct config_source *cs = dm_config_get_custom(cft);
struct process_config_file_params *pcfp;
if (!_is_file_based_config_source(cs->type)) {
log_error(INTERNAL_ERROR "config_file_read_fd: expected file, special file "
"or profile config source, found %s config source.",
_config_source_names[cs->type]);
return 0;
goto bad;
}
if (!(pcfp = dm_pool_zalloc(mem, sizeof(*pcfp)))) {
log_debug("config_file_read_fd: process_config_file_params struct allocation failed");
goto bad;
}
pcfp->cft = cft;
pcfp->dev = dev;
pcfp->offset = offset;
pcfp->size = size;
pcfp->offset2 = offset2;
pcfp->size2 = size2;
pcfp->checksum_fn = checksum_fn;
pcfp->checksum = checksum;
pcfp->checksum_only = checksum_only;
pcfp->no_dup_node_check = no_dup_node_check;
pcfp->config_file_read_fd_callback = config_file_read_fd_callback;
pcfp->config_file_read_fd_context = config_file_read_fd_context;
pcfp->ret = 1;
/* Only use mmap with regular files */
if (!(dev->flags & DEV_REGULAR) || size2)
if (!(dev->flags & DEV_REGULAR) || circular)
use_mmap = 0;
if (use_mmap) {
@@ -524,56 +594,40 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
MAP_PRIVATE, dev_fd(dev), offset - mmap_offset);
if (fb == (caddr_t) (-1)) {
log_sys_error("mmap", dev_name(dev));
goto out;
goto bad;
}
fb = fb + mmap_offset;
} else {
if (!(buf = dm_malloc(size + size2))) {
log_error("Failed to allocate circular buffer.");
return 0;
}
if (!dev_read_circular(dev, (uint64_t) offset, size,
(uint64_t) offset2, size2, buf)) {
goto out;
}
fb = buf;
}
if (checksum_fn && checksum !=
(checksum_fn(checksum_fn(INITIAL_CRC, (const uint8_t *)fb, size),
(const uint8_t *)(fb + size), size2))) {
log_error("%s: Checksum error", dev_name(dev));
goto out;
}
if (!checksum_only) {
fe = fb + size + size2;
if (no_dup_node_check) {
if (!dm_config_parse_without_dup_node_check(cft, fb, fe))
goto_out;
} else {
if (!dm_config_parse(cft, fb, fe))
goto_out;
}
}
r = 1;
out:
if (!use_mmap)
dm_free(buf);
else {
_process_config_file_buffer(0, ioflags, pcfp, fb + mmap_offset);
r = pcfp->ret;
/* unmap the file */
if (munmap(fb - mmap_offset, size + mmap_offset)) {
if (munmap(fb, size + mmap_offset)) {
log_sys_error("munmap", dev_name(dev));
r = 0;
}
} else {
if (circular) {
if (!(buf = dev_read_circular(dev, (uint64_t) offset, size, (uint64_t) offset2, size2, reason)))
goto_out;
_process_config_file_buffer(0, ioflags, pcfp, buf);
dm_free((void *)buf);
} else {
dev_read_callback(dev, (uint64_t) offset, size, reason, ioflags, _process_config_file_buffer, pcfp);
if (config_file_read_fd_callback)
return 1;
}
r = pcfp->ret;
}
out:
return r;
bad:
if (config_file_read_fd_callback)
config_file_read_fd_callback(1, ioflags, config_file_read_fd_context, NULL);
return 0;
}
int config_file_read(struct dm_config_tree *cft)
int config_file_read(struct dm_pool *mem, struct dm_config_tree *cft)
{
const char *filename = NULL;
struct config_source *cs = dm_config_get_custom(cft);
@@ -601,8 +655,8 @@ int config_file_read(struct dm_config_tree *cft)
}
}
r = config_file_read_fd(cft, cf->dev, 0, (size_t) info.st_size, 0, 0,
(checksum_fn_t) NULL, 0, 0, 0);
r = config_file_read_fd(mem, cft, cf->dev, DEV_IO_MDA_CONTENT, 0, (size_t) info.st_size, 0, 0,
(checksum_fn_t) NULL, 0, 0, 0, 0, NULL, NULL);
if (!cf->keep_open) {
if (!dev_close(cf->dev))

View File

@@ -17,12 +17,12 @@
#define _LVM_CONFIG_H
#include "libdevmapper.h"
#include "device.h"
/* 16 bits: 3 bits for major, 4 bits for minor, 9 bits for patchlevel */
/* FIXME Max LVM version supported: 7.15.511. Extend bits when needed. */
#define vsn(major, minor, patchlevel) (major << 13 | minor << 9 | patchlevel)
struct device;
struct cmd_context;
typedef enum {
@@ -239,11 +239,13 @@ config_source_t config_get_source_type(struct dm_config_tree *cft);
typedef uint32_t (*checksum_fn_t) (uint32_t initial, const uint8_t *buf, uint32_t size);
struct dm_config_tree *config_open(config_source_t source, const char *filename, int keep_open);
int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
int config_file_read_fd(struct dm_pool *mem, struct dm_config_tree *cft, struct device *dev, dev_io_reason_t reason,
off_t offset, size_t size, off_t offset2, size_t size2,
checksum_fn_t checksum_fn, uint32_t checksum,
int skip_parse, int no_dup_node_check);
int config_file_read(struct dm_config_tree *cft);
int skip_parse, int no_dup_node_check, unsigned ioflags,
lvm_callback_fn_t config_file_read_fd_callback, void *config_file_read_fd_context);
int config_file_read(struct dm_pool *mem, struct dm_config_tree *cft);
struct dm_config_tree *config_file_open_and_read(const char *config_file, config_source_t source,
struct cmd_context *cmd);
int config_write(struct dm_config_tree *cft, struct config_def_tree_spec *tree_spec,

View File

@@ -226,6 +226,16 @@ cfg(devices_dir_CFG, "dir", devices_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_STRING,
cfg_array(devices_scan_CFG, "scan", devices_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_STRING, "#S/dev", vsn(1, 0, 0), NULL, 0, NULL,
"Directories containing device nodes to use with LVM.\n")
cfg(devices_use_aio_CFG, "use_aio", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_USE_AIO, vsn(2, 2, 178), NULL, 0, NULL,
"Use linux asynchronous I/O for parallel device access where possible.\n")
cfg(devices_aio_max_CFG, "aio_max", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_AIO_MAX, vsn(2, 2, 178), NULL, 0, NULL,
"Maximum number of asynchronous I/Os to issue concurrently.\n")
cfg(devices_aio_memory_CFG, "aio_memory", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_AIO_MEMORY, vsn(2, 2, 178), NULL, 0, NULL,
"Approximate maximum total amount of memory (in MB) used\n"
"for asynchronous I/O buffers.\n")
cfg_array(devices_loopfiles_CFG, "loopfiles", devices_CFG_SECTION, CFG_DEFAULT_UNDEFINED | CFG_UNSUPPORTED, CFG_TYPE_STRING, NULL, vsn(1, 2, 0), NULL, 0, NULL, NULL)
cfg(devices_obtain_device_list_from_udev_CFG, "obtain_device_list_from_udev", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_OBTAIN_DEVICE_LIST_FROM_UDEV, vsn(2, 2, 85), NULL, 0, NULL,
@@ -705,11 +715,11 @@ cfg(log_activation_CFG, "activation", log_CFG_SECTION, 0, CFG_TYPE_BOOL, 0, vsn(
cfg(log_activate_file_CFG, "activate_file", log_CFG_SECTION, CFG_DEFAULT_UNDEFINED | CFG_UNSUPPORTED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL, 0, NULL, NULL)
cfg_array(log_debug_classes_CFG, "debug_classes", log_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, "#Smemory#Sdevices#Sactivation#Sallocation#Slvmetad#Smetadata#Scache#Slocking#Slvmpolld#Sdbus", vsn(2, 2, 99), NULL, 0, NULL,
cfg_array(log_debug_classes_CFG, "debug_classes", log_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, "#Smemory#Sdevices#Sio#Sactivation#Sallocation#Slvmetad#Smetadata#Scache#Slocking#Slvmpolld#Sdbus", vsn(2, 2, 99), NULL, 0, NULL,
"Select log messages by class.\n"
"Some debugging messages are assigned to a class and only appear in\n"
"debug output if the class is listed here. Classes currently\n"
"available: memory, devices, activation, allocation, lvmetad,\n"
"available: memory, devices, io, activation, allocation, lvmetad,\n"
"metadata, cache, locking, lvmpolld. Use \"all\" to see everything.\n")
cfg(backup_backup_CFG, "backup", backup_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_BACKUP_ENABLED, vsn(1, 0, 0), NULL, 0, NULL,

View File

@@ -32,6 +32,9 @@
#define DEFAULT_SYSTEM_ID_SOURCE "none"
#define DEFAULT_OBTAIN_DEVICE_LIST_FROM_UDEV 1
#define DEFAULT_EXTERNAL_DEVICE_INFO_SOURCE "none"
#define DEFAULT_USE_AIO 1
#define DEFAULT_AIO_MAX 128
#define DEFAULT_AIO_MEMORY 10
#define DEFAULT_SYSFS_SCAN 1
#define DEFAULT_MD_COMPONENT_DETECTION 1
#define DEFAULT_FW_RAID_COMPONENT_DETECTION 0

View File

@@ -708,7 +708,7 @@ static int _insert_dev(const char *path, dev_t d)
if (dm_hash_lookup(_cache.names, path) == dev) {
/* Hash already has matching entry present */
log_debug("Path already cached %s.", path);
log_debug("%s: Path already cached.", path);
return 1;
}
@@ -939,12 +939,20 @@ static int _insert_udev_dir(struct udev *udev, const char *dir)
struct udev_device *device;
int r = 1;
if (!(udev_enum = udev_enumerate_new(udev)))
goto bad;
if (!(udev_enum = udev_enumerate_new(udev))) {
log_error("Failed to udev_enumerate_new.");
return 0;
}
if (udev_enumerate_add_match_subsystem(udev_enum, "block") ||
udev_enumerate_scan_devices(udev_enum))
goto bad;
if (udev_enumerate_add_match_subsystem(udev_enum, "block")) {
log_error("Failed to udev_enumerate_add_match_subsystem.");
goto out;
}
if (udev_enumerate_scan_devices(udev_enum)) {
log_error("Failed to udev_enumerate_scan_devices.");
goto out;
}
/*
* Report any missing information as "log_very_verbose" only, do not
@@ -981,13 +989,10 @@ static int _insert_udev_dir(struct udev *udev, const char *dir)
udev_device_unref(device);
}
out:
udev_enumerate_unref(udev_enum);
return r;
bad:
log_error("Failed to enumerate udev device list.");
udev_enumerate_unref(udev_enum);
return 0;
return r;
}
static void _insert_dirs(struct dm_list *dirs)
@@ -1242,12 +1247,24 @@ int dev_cache_check_for_open_devices(void)
int dev_cache_exit(void)
{
struct btree_iter *b;
int num_open = 0;
dev_async_exit();
if (_cache.names)
if ((num_open = _check_for_open_devices(1)) > 0)
log_error(INTERNAL_ERROR "%d device(s) were left open and have been closed.", num_open);
if (_cache.devices) {
/* FIXME Replace with structured devbuf cache */
b = btree_first(_cache.devices);
while (b) {
devbufs_release(btree_get_data(b));
b = btree_next(b);
}
}
if (_cache.mem)
dm_pool_destroy(_cache.mem);
@@ -1370,6 +1387,19 @@ const char *dev_name_confirmed(struct device *dev, int quiet)
return dev_name(dev);
}
/* Provide a custom reason when a device is ignored */
const char *dev_cache_filtered_reason(const char *name)
{
const char *reason = "not found";
struct device *d = (struct device *) dm_hash_lookup(_cache.names, name);
if (d)
/* FIXME Record which filter caused the exclusion */
reason = "excluded by a filter";
return reason;
}
struct device *dev_cache_get(const char *name, struct dev_filter *f)
{
struct stat buf;
@@ -1405,7 +1435,7 @@ struct device *dev_cache_get(const char *name, struct dev_filter *f)
if (!d || (f && !(d->flags & DEV_REGULAR) && !(f->passes_filter(f, d))))
return NULL;
log_debug_devs("Using %s", dev_name(d));
log_debug_devs("%s: Using device (%d:%d)", dev_name(d), (int) MAJOR(d->dev), (int) MINOR(d->dev));
return d;
}
@@ -1515,7 +1545,7 @@ struct device *dev_iter_get(struct dev_iter *iter)
struct device *d = _iter_next(iter);
if (!iter->filter || (d->flags & DEV_REGULAR) ||
iter->filter->passes_filter(iter->filter, d)) {
log_debug_devs("Using %s", dev_name(d));
log_debug_devs("%s: Using device (%d:%d)", dev_name(d), (int) MAJOR(d->dev), (int) MINOR(d->dev));
return d;
}
}

View File

@@ -23,10 +23,10 @@
* predicate for devices.
*/
struct dev_filter {
int (*passes_filter) (struct dev_filter * f, struct device * dev);
void (*destroy) (struct dev_filter * f);
void (*wipe) (struct dev_filter * f);
int (*dump) (struct dev_filter * f, int merge_existing);
int (*passes_filter) (struct dev_filter *f, struct device *dev);
void (*destroy) (struct dev_filter *f);
void (*wipe) (struct dev_filter *f);
int (*dump) (struct dev_filter *f, struct dm_pool *mem, int merge_existing);
void *private;
unsigned use_count;
};
@@ -55,6 +55,7 @@ int dev_cache_add_dir(const char *path);
int dev_cache_add_loopfile(const char *path);
__attribute__((nonnull(1)))
struct device *dev_cache_get(const char *name, struct dev_filter *f);
const char *dev_cache_filtered_reason(const char *name);
// TODO
struct device *dev_cache_get_by_devt(dev_t device, struct dev_filter *f);

View File

@@ -100,8 +100,6 @@ const char *dev_ext_name(struct device *dev)
return _ext_registry[dev->ext.src].name;
}
static const char *_ext_attached_msg = "External handle attached to device";
struct dev_ext *dev_ext_get(struct device *dev)
{
struct dev_ext *ext;
@@ -110,10 +108,10 @@ struct dev_ext *dev_ext_get(struct device *dev)
handle_ptr = dev->ext.handle;
if (!(ext = _ext_registry[dev->ext.src].dev_ext_get(dev)))
log_error("Failed to get external handle for device %s [%s].",
log_error("%s: Failed to get external handle [%s].",
dev_name(dev), dev_ext_name(dev));
else if (handle_ptr != dev->ext.handle)
log_debug_devs("%s %s [%s:%p]", _ext_attached_msg, dev_name(dev),
log_debug_devs("%s: External handle [%s:%p] attached", dev_name(dev),
dev_ext_name(dev), dev->ext.handle);
return ext;
@@ -131,10 +129,10 @@ int dev_ext_release(struct device *dev)
handle_ptr = dev->ext.handle;
if (!(r = _ext_registry[dev->ext.src].dev_ext_release(dev)))
log_error("Failed to release external handle for device %s [%s:%p].",
log_error("%s: Failed to release external handle [%s:%p]",
dev_name(dev), dev_ext_name(dev), dev->ext.handle);
else
log_debug_devs("External handle detached from device %s [%s:%p]",
log_debug_devs("%s: External handle [%s:%p] detached",
dev_name(dev), dev_ext_name(dev), handle_ptr);
return r;
@@ -143,7 +141,7 @@ int dev_ext_release(struct device *dev)
int dev_ext_enable(struct device *dev, dev_ext_t src)
{
if (dev->ext.enabled && (dev->ext.src != src) && !dev_ext_release(dev)) {
log_error("Failed to enable external handle for device %s [%s].",
log_error("%s: Failed to enable external handle [%s].",
dev_name(dev), _ext_registry[src].name);
return 0;
}
@@ -160,7 +158,7 @@ int dev_ext_disable(struct device *dev)
return 1;
if (!dev_ext_release(dev)) {
log_error("Failed to disable external handle for device %s [%s].",
log_error("%s: Failed to disable external handle [%s].",
dev_name(dev), dev_ext_name(dev));
return 0;
}

View File

@@ -53,36 +53,345 @@
# endif
#endif
/*
* Always read at least 8k from disk.
* This seems to be a good compromise for the existing LVM2 metadata layout.
*/
#define MIN_READ_SIZE (8 * 1024)
static DM_LIST_INIT(_open_devices);
static unsigned _dev_size_seqno = 1;
static const char *_reasons[] = {
"dev signatures",
"PV labels",
"VG metadata header",
"VG metadata content",
"extra VG metadata header",
"extra VG metadata content",
"LVM1 metadata",
"pool metadata",
"LV content",
"logging",
};
static const char *_reason_text(dev_io_reason_t reason)
{
return _reasons[(unsigned) reason];
}
/*
* Release the memory holding the last data we read
*/
static void _release_devbuf(struct device_buffer *devbuf)
{
dm_free(devbuf->malloc_address);
devbuf->malloc_address = NULL;
}
void devbufs_release(struct device *dev)
{
if ((dev->flags & DEV_REGULAR))
return;
_release_devbuf(&dev->last_devbuf);
_release_devbuf(&dev->last_extra_devbuf);
}
#ifdef AIO_SUPPORT
# include <libaio.h>
static io_context_t _aio_ctx = 0;
static struct io_event *_aio_events = NULL;
static int _aio_max = 0;
static int64_t _aio_memory_max = 0;
static int _aio_must_queue = 0; /* Have we reached AIO capacity? */
static DM_LIST_INIT(_aio_queue);
#define DEFAULT_AIO_COLLECTION_EVENTS 32
int dev_async_setup(struct cmd_context *cmd)
{
int r;
_aio_max = find_config_tree_int(cmd, devices_aio_max_CFG, NULL);
_aio_memory_max = find_config_tree_int(cmd, devices_aio_memory_CFG, NULL) * 1024 * 1024;
/* Threshold is zero? */
if (!_aio_max || !_aio_memory_max) {
if (_aio_ctx)
dev_async_exit();
return 1;
}
/* Already set up? */
if (_aio_ctx)
return 1;
log_debug_io("Setting up aio context for up to %" PRId64 " MB across %d events.", _aio_memory_max, _aio_max);
if (!_aio_events && !(_aio_events = dm_zalloc(sizeof(*_aio_events) * DEFAULT_AIO_COLLECTION_EVENTS))) {
log_error("Failed to allocate io_event array for asynchronous I/O.");
return 0;
}
if ((r = io_setup(_aio_max, &_aio_ctx)) < 0) {
/*
* Possible errors:
* ENOSYS - aio not available in current kernel
* EAGAIN - _aio_max is too big
* EFAULT - invalid pointer
* EINVAL - _aio_ctx != 0 or kernel aio limits exceeded
* ENOMEM
*/
log_warn("WARNING: Asynchronous I/O setup for %d events failed: %s", _aio_max, strerror(-r));
log_warn("WARNING: Using only synchronous I/O.");
dm_free(_aio_events);
_aio_events = NULL;
_aio_ctx = 0;
return 0;
}
return 1;
}
/* Reset aio context after fork */
int dev_async_reset(struct cmd_context *cmd)
{
log_debug_io("Resetting asynchronous I/O context.");
_aio_ctx = 0;
dm_free(_aio_events);
_aio_events = NULL;
return dev_async_setup(cmd);
}
/*
* Track the amount of in-flight async I/O.
* If it exceeds the defined threshold set _aio_must_queue.
*/
static void _update_aio_counters(int nr, ssize_t bytes)
{
static int64_t aio_bytes = 0;
static int aio_count = 0;
aio_bytes += bytes;
aio_count += nr;
if (aio_count >= _aio_max || aio_bytes > _aio_memory_max)
_aio_must_queue = 1;
else
_aio_must_queue = 0;
}
static int _io(struct device_buffer *devbuf, unsigned ioflags);
int dev_async_getevents(void)
{
struct device_buffer *devbuf, *tmp;
lvm_callback_fn_t dev_read_callback_fn;
void *dev_read_callback_context;
int r, event_nr;
if (!_aio_ctx)
return 1;
do {
/* FIXME Add timeout - currently NULL - waits for ever for at least 1 item */
r = io_getevents(_aio_ctx, 1, DEFAULT_AIO_COLLECTION_EVENTS, _aio_events, NULL);
if (r > 0)
break;
if (!r)
return 1; /* Timeout elapsed */
if (r == -EINTR)
continue;
if (r == -EAGAIN) {
usleep(100);
return 1; /* Give the caller the opportunity to do other work before repeating */
}
/*
* ENOSYS - not supported by kernel
* EFAULT - memory invalid
* EINVAL - _aio_ctx invalid or min_nr/nr/timeout out of range
*/
log_error("Asynchronous event collection failed: %s", strerror(-r));
return 0;
} while (1);
for (event_nr = 0; event_nr < r; event_nr++) {
devbuf = _aio_events[event_nr].obj->data;
dm_free(_aio_events[event_nr].obj);
_update_aio_counters(-1, -devbuf->where.size);
dev_read_callback_fn = devbuf->dev_read_callback_fn;
dev_read_callback_context = devbuf->dev_read_callback_context;
/* Clear the callbacks as a precaution */
devbuf->dev_read_callback_context = NULL;
devbuf->dev_read_callback_fn = NULL;
if (_aio_events[event_nr].res == devbuf->where.size) {
if (dev_read_callback_fn)
dev_read_callback_fn(0, AIO_SUPPORTED_CODE_PATH, dev_read_callback_context, (char *)devbuf->buf + devbuf->data_offset);
} else {
/* FIXME If partial read is possible, resubmit remainder */
log_error_once("%s: Asynchronous I/O failed: read only %" PRIu64 " of %" PRIu64 " bytes at %" PRIu64,
dev_name(devbuf->where.dev),
(uint64_t) _aio_events[event_nr].res, (uint64_t) devbuf->where.size,
(uint64_t) devbuf->where.start);
_release_devbuf(devbuf);
if (dev_read_callback_fn)
dev_read_callback_fn(1, AIO_SUPPORTED_CODE_PATH, dev_read_callback_context, NULL);
else
r = 0;
}
}
/* Submit further queued events if we can */
dm_list_iterate_items_gen_safe(devbuf, tmp, &_aio_queue, aio_queued) {
if (_aio_must_queue)
break;
dm_list_del(&devbuf->aio_queued);
_io(devbuf, 1);
}
return 1;
}
static int _io_async(struct device_buffer *devbuf)
{
struct device_area *where = &devbuf->where;
struct iocb *iocb;
int r;
_update_aio_counters(1, devbuf->where.size);
if (!(iocb = dm_malloc(sizeof(*iocb)))) {
log_error("Failed to allocate I/O control block array for asynchronous I/O.");
return 0;
}
io_prep_pread(iocb, dev_fd(where->dev), devbuf->buf, where->size, where->start);
iocb->data = devbuf;
do {
r = io_submit(_aio_ctx, 1L, &iocb);
if (r ==1)
break; /* Success */
if (r == -EAGAIN) {
/* Try to release some resources then retry */
usleep(100);
if (dev_async_getevents())
return_0;
/* FIXME Add counter/timeout so we can't get stuck here for ever */
continue;
}
/*
* Possible errors:
* EFAULT - invalid data
* ENOSYS - no aio support in kernel
* EBADF - bad file descriptor in iocb
* EINVAL - invalid _aio_ctx / iocb not initialised / invalid operation for this fd
*/
log_error("Asynchronous event submission failed: %s", strerror(-r));
return 0;
} while (1);
return 1;
}
void dev_async_exit(void)
{
struct device_buffer *devbuf, *tmp;
lvm_callback_fn_t dev_read_callback_fn;
void *dev_read_callback_context;
int r;
if (!_aio_ctx)
return;
/* Discard any queued requests */
dm_list_iterate_items_gen_safe(devbuf, tmp, &_aio_queue, aio_queued) {
dm_list_del(&devbuf->aio_queued);
_update_aio_counters(-1, -devbuf->where.size);
dev_read_callback_fn = devbuf->dev_read_callback_fn;
dev_read_callback_context = devbuf->dev_read_callback_context;
_release_devbuf(devbuf);
if (dev_read_callback_fn)
dev_read_callback_fn(1, AIO_SUPPORTED_CODE_PATH, dev_read_callback_context, NULL);
}
log_debug_io("Destroying aio context.");
if ((r = io_destroy(_aio_ctx)) < 0)
/* Returns -ENOSYS if aio not in kernel or -EINVAL if _aio_ctx invalid */
log_error("Failed to destroy asynchronous I/O context: %s", strerror(-r));
dm_free(_aio_events);
_aio_events = NULL;
_aio_ctx = 0;
}
static void _queue_aio(struct device_buffer *devbuf)
{
dm_list_add(&_aio_queue, &devbuf->aio_queued);
log_debug_io("Queueing aio.");
}
#else
static int _aio_ctx = 0;
static int _aio_must_queue = 0;
int dev_async_setup(struct cmd_context *cmd)
{
return 1;
}
int dev_async_reset(struct cmd_context *cmd)
{
return 1;
}
int dev_async_getevents(void)
{
return 1;
}
void dev_async_exit(void)
{
}
static int _io_async(struct device_buffer *devbuf)
{
return 0;
}
static void _queue_aio(struct device_buffer *devbuf)
{
}
#endif /* AIO_SUPPORT */
/*-----------------------------------------------------------------
* The standard io loop that keeps submitting an io until it's
* all gone.
*---------------------------------------------------------------*/
static int _io(struct device_area *where, char *buffer, int should_write)
static int _io_sync(struct device_buffer *devbuf)
{
struct device_area *where = &devbuf->where;
int fd = dev_fd(where->dev);
char *buffer = devbuf->buf;
ssize_t n = 0;
size_t total = 0;
if (fd < 0) {
log_error("Attempt to read an unopened device (%s).",
dev_name(where->dev));
return 0;
}
/*
* Skip all writes in test mode.
*/
if (should_write && test_mode())
return 1;
if (where->size > SSIZE_MAX) {
log_error("Read size too large: %" PRIu64, where->size);
return 0;
}
if (lseek(fd, (off_t) where->start, SEEK_SET) == (off_t) -1) {
log_error("%s: lseek %" PRIu64 " failed: %s",
dev_name(where->dev), (uint64_t) where->start,
@@ -92,7 +401,7 @@ static int _io(struct device_area *where, char *buffer, int should_write)
while (total < (size_t) where->size) {
do
n = should_write ?
n = devbuf->write ?
write(fd, buffer, (size_t) where->size - total) :
read(fd, buffer, (size_t) where->size - total);
while ((n < 0) && ((errno == EINTR) || (errno == EAGAIN)));
@@ -100,7 +409,7 @@ static int _io(struct device_area *where, char *buffer, int should_write)
if (n < 0)
log_error_once("%s: %s failed after %" PRIu64 " of %" PRIu64
" at %" PRIu64 ": %s", dev_name(where->dev),
should_write ? "write" : "read",
devbuf->write ? "write" : "read",
(uint64_t) total,
(uint64_t) where->size,
(uint64_t) where->start, strerror(errno));
@@ -115,6 +424,42 @@ static int _io(struct device_area *where, char *buffer, int should_write)
return (total == (size_t) where->size);
}
static int _io(struct device_buffer *devbuf, unsigned ioflags)
{
struct device_area *where = &devbuf->where;
int fd = dev_fd(where->dev);
int async = (!devbuf->write && _aio_ctx && aio_supported_code_path(ioflags) && devbuf->dev_read_callback_fn) ? 1 : 0;
if (fd < 0) {
log_error("Attempt to read an unopened device (%s).",
dev_name(where->dev));
return 0;
}
if (!devbuf->buf && !(devbuf->malloc_address = devbuf->buf = dm_malloc_aligned((size_t) devbuf->where.size, 0))) {
log_error("I/O buffer malloc failed");
return 0;
}
log_debug_io("%s %s(fd %d):%8" PRIu64 " bytes (%ssync) at %" PRIu64 "%s (for %s)",
devbuf->write ? "Write" : "Read ", dev_name(where->dev), fd,
where->size, async ? "a" : "", (uint64_t) where->start,
(devbuf->write && test_mode()) ? " (test mode - suppressed)" : "", _reason_text(devbuf->reason));
/*
* Skip all writes in test mode.
*/
if (devbuf->write && test_mode())
return 1;
if (where->size > SSIZE_MAX) {
log_error("Read size too large: %" PRIu64, where->size);
return 0;
}
return async ? _io_async(devbuf) : _io_sync(devbuf);
}
/*-----------------------------------------------------------------
* LVM2 uses O_DIRECT when performing metadata io, which requires
* block size aligned accesses. If any io is not aligned we have
@@ -142,7 +487,7 @@ int dev_get_block_size(struct device *dev, unsigned int *physical_block_size, un
r = 0;
goto out;
}
log_debug_devs("%s: block size is %u bytes", name, dev->block_size);
log_debug_devs("%s: Block size is %u bytes", name, dev->block_size);
}
#ifdef BLKPBSZGET
@@ -153,7 +498,7 @@ int dev_get_block_size(struct device *dev, unsigned int *physical_block_size, un
r = 0;
goto out;
}
log_debug_devs("%s: physical block size is %u bytes", name, dev->phys_block_size);
log_debug_devs("%s: Physical block size is %u bytes", name, dev->phys_block_size);
}
#elif defined (BLKSSZGET)
/* if we can't get physical block size, just use logical block size instead */
@@ -163,15 +508,13 @@ int dev_get_block_size(struct device *dev, unsigned int *physical_block_size, un
r = 0;
goto out;
}
log_debug_devs("%s: physical block size can't be determined, using logical "
"block size of %u bytes", name, dev->phys_block_size);
log_debug_devs("%s: Physical block size can't be determined: Using logical block size of %u bytes", name, dev->phys_block_size);
}
#else
/* if even BLKSSZGET is not available, use default 512b */
if (dev->phys_block_size == -1) {
dev->phys_block_size = 512;
log_debug_devs("%s: physical block size can't be determined, using block "
"size of %u bytes instead", name, dev->phys_block_size);
log_debug_devs("%s: Physical block size can't be determined: Using block size of %u bytes instead", name, dev->phys_block_size);
}
#endif
@@ -206,14 +549,16 @@ static void _widen_region(unsigned int block_size, struct device_area *region,
result->size += block_size - delta;
}
static int _aligned_io(struct device_area *where, char *buffer,
int should_write)
static int _aligned_io(struct device_area *where, char *write_buffer,
int should_write, dev_io_reason_t reason,
unsigned ioflags, lvm_callback_fn_t dev_read_callback_fn, void *dev_read_callback_context)
{
char *bounce, *bounce_buf;
unsigned int physical_block_size = 0;
unsigned int block_size = 0;
unsigned buffer_was_widened = 0;
uintptr_t mask;
struct device_area widened;
struct device_buffer *devbuf;
int r = 0;
if (!(where->dev->flags & DEV_REGULAR) &&
@@ -223,53 +568,93 @@ static int _aligned_io(struct device_area *where, char *buffer,
if (!block_size)
block_size = lvm_getpagesize();
/* Apply minimum read size */
if (!should_write && block_size < MIN_READ_SIZE)
block_size = MIN_READ_SIZE;
mask = block_size - 1;
_widen_region(block_size, where, &widened);
/* Do we need to use a bounce buffer? */
mask = block_size - 1;
if (!memcmp(where, &widened, sizeof(widened)) &&
!((uintptr_t) buffer & mask))
return _io(where, buffer, should_write);
/* Did we widen the buffer? When writing, this means means read-modify-write. */
if (where->size != widened.size || where->start != widened.start) {
buffer_was_widened = 1;
log_debug_io("Widening request for %" PRIu64 " bytes at %" PRIu64 " to %" PRIu64 " bytes at %" PRIu64 " on %s (for %s)",
where->size, (uint64_t) where->start, widened.size, (uint64_t) widened.start, dev_name(where->dev), _reason_text(reason));
}
/* Allocate a bounce buffer with an extra block */
if (!(bounce_buf = bounce = dm_malloc((size_t) widened.size + block_size))) {
log_error("Bounce buffer malloc failed");
return 0;
devbuf = DEV_DEVBUF(where->dev, reason);
_release_devbuf(devbuf);
devbuf->where.dev = where->dev;
devbuf->where.start = widened.start;
devbuf->where.size = widened.size;
devbuf->write = should_write;
devbuf->reason = reason;
devbuf->dev_read_callback_fn = dev_read_callback_fn;
devbuf->dev_read_callback_context = dev_read_callback_context;
/* Store location of requested data relative to start of buf */
devbuf->data_offset = where->start - devbuf->where.start;
if (should_write && !buffer_was_widened && !((uintptr_t) write_buffer & mask))
/* Perform the I/O directly. */
devbuf->buf = write_buffer;
else if (!should_write)
/* Postpone buffer allocation until we're about to issue the I/O */
devbuf->buf = NULL;
else {
/* Allocate a bounce buffer with an extra block */
if (!(devbuf->malloc_address = devbuf->buf = dm_malloc((size_t) devbuf->where.size + block_size))) {
log_error("Bounce buffer malloc failed");
return 0;
}
/*
* Realign start of bounce buffer (using the extra sector)
*/
if (((uintptr_t) devbuf->buf) & mask)
devbuf->buf = (char *) ((((uintptr_t) devbuf->buf) + mask) & ~mask);
}
/*
* Realign start of bounce buffer (using the extra sector)
*/
if (((uintptr_t) bounce) & mask)
bounce = (char *) ((((uintptr_t) bounce) + mask) & ~mask);
/* If we've reached our concurrent AIO limit, add this request to the queue */
if (!devbuf->write && _aio_ctx && aio_supported_code_path(ioflags) && dev_read_callback_fn && _aio_must_queue) {
_queue_aio(devbuf);
return 1;
}
/* channel the io through the bounce buffer */
if (!_io(&widened, bounce, 0)) {
devbuf->write = 0;
/* Do we need to read into the bounce buffer? */
if ((!should_write || buffer_was_widened) && !_io(devbuf, ioflags)) {
if (!should_write)
goto_out;
goto_bad;
/* FIXME Handle errors properly! */
/* FIXME pre-extend the file */
memset(bounce, '\n', widened.size);
memset(devbuf->buf, '\n', devbuf->where.size);
}
if (should_write) {
memcpy(bounce + (where->start - widened.start), buffer,
(size_t) where->size);
if (!should_write)
return 1;
/* ... then we write */
if (!(r = _io(&widened, bounce, 1)))
stack;
goto out;
/* writes */
if (devbuf->malloc_address) {
memcpy((char *) devbuf->buf + devbuf->data_offset, write_buffer, (size_t) where->size);
log_debug_io("Overwriting %" PRIu64 " bytes at %" PRIu64 " (for %s)", where->size,
(uint64_t) where->start, _reason_text(devbuf->reason));
}
memcpy(buffer, bounce + (where->start - widened.start),
(size_t) where->size);
/* ... then we write */
devbuf->write = 1;
if (!(r = _io(devbuf, 0)))
goto_bad;
r = 1;
_release_devbuf(devbuf);
return 1;
out:
dm_free(bounce_buf);
return r;
bad:
_release_devbuf(devbuf);
return 0;
}
static int _dev_get_size_file(struct device *dev, uint64_t *size)
@@ -463,11 +848,12 @@ int dev_open_flags(struct device *dev, int flags, int direct, int quiet)
return 1;
}
if (dev->open_count && !need_excl) {
log_debug_devs("%s already opened read-only. Upgrading "
if (dev->open_count && !need_excl)
log_debug_devs("%s: Already opened read-only. Upgrading "
"to read-write.", dev_name(dev));
dev->open_count++;
}
/* dev_close_immediate will decrement this */
dev->open_count++;
dev_close_immediate(dev);
// FIXME: dev with DEV_ALLOCED is released
@@ -621,6 +1007,7 @@ static void _close(struct device *dev)
dev->phys_block_size = -1;
dev->block_size = -1;
dm_list_del(&dev->open_list);
devbufs_release(dev);
log_debug_devs("Closed %s", dev_name(dev));
@@ -693,72 +1080,138 @@ static void _dev_inc_error_count(struct device *dev)
dev->max_error_count, dev_name(dev));
}
int dev_read(struct device *dev, uint64_t offset, size_t len, void *buffer)
/*
* Data is returned (read-only) at DEV_DEVBUF_DATA(dev, reason).
* If dev_read_callback_fn is supplied, we always return 1 and take
* responsibility for calling it exactly once. This might happen before the
* function returns (if there's an error or the I/O is synchronous) or after.
* Any error is passed to that function, which must track it if required.
*/
static int _dev_read_callback(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason,
unsigned ioflags, lvm_callback_fn_t dev_read_callback_fn, void *callback_context)
{
struct device_area where;
int ret;
struct device_buffer *devbuf;
uint64_t buf_end;
int cached = 0;
int ret = 0;
if (!dev->open_count)
return_0;
if (!dev->open_count) {
log_error(INTERNAL_ERROR "Attempt to access device %s while closed.", dev_name(dev));
goto out;
}
if (!_dev_is_valid(dev))
return 0;
goto_out;
/*
* Can we satisfy this from data we stored last time we read?
*/
if ((devbuf = DEV_DEVBUF(dev, reason)) && devbuf->malloc_address) {
buf_end = devbuf->where.start + devbuf->where.size - 1;
if (offset >= devbuf->where.start && offset <= buf_end && offset + len - 1 <= buf_end) {
/* Reuse this buffer */
cached = 1;
devbuf->data_offset = offset - devbuf->where.start;
log_debug_io("Cached read for %" PRIu64 " bytes at %" PRIu64 " on %s (for %s)",
(uint64_t) len, (uint64_t) offset, dev_name(dev), _reason_text(reason));
ret = 1;
goto out;
}
}
where.dev = dev;
where.start = offset;
where.size = len;
// fprintf(stderr, "READ: %s, %lld, %d\n", dev_name(dev), offset, len);
ret = _aligned_io(&where, buffer, 0);
if (!ret)
ret = _aligned_io(&where, NULL, 0, reason, ioflags, dev_read_callback_fn, callback_context);
if (!ret) {
log_error("Read from %s failed", dev_name(dev));
_dev_inc_error_count(dev);
}
out:
/* If we had an error or this was sync I/O, pass the result to any callback fn */
if ((!ret || !_aio_ctx || !aio_supported_code_path(ioflags) || cached) && dev_read_callback_fn) {
dev_read_callback_fn(!ret, ioflags, callback_context, DEV_DEVBUF_DATA(dev, reason));
return 1;
}
return ret;
}
/*
* Read from 'dev' into 'buf', possibly in 2 distinct regions, denoted
* by (offset,len) and (offset2,len2). Thus, the total size of
* 'buf' should be len+len2.
*/
int dev_read_circular(struct device *dev, uint64_t offset, size_t len,
uint64_t offset2, size_t len2, char *buf)
void dev_read_callback(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason,
unsigned ioflags, lvm_callback_fn_t dev_read_callback_fn, void *callback_context)
{
if (!dev_read(dev, offset, len, buf)) {
/* Always returns 1 if callback fn is supplied */
if (!_dev_read_callback(dev, offset, len, reason, ioflags, dev_read_callback_fn, callback_context))
log_error(INTERNAL_ERROR "_dev_read_callback failed");
}
/* Returns pointer to read-only buffer. Caller does not free it. */
const char *dev_read(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason)
{
if (!_dev_read_callback(dev, offset, len, reason, 0, NULL, NULL))
return_NULL;
return DEV_DEVBUF_DATA(dev, reason);
}
/* Read into supplied retbuf owned by the caller. */
int dev_read_buf(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, void *retbuf)
{
if (!_dev_read_callback(dev, offset, len, reason, 0, NULL, NULL)) {
log_error("Read from %s failed", dev_name(dev));
return 0;
}
/*
* The second region is optional, and allows for
* a circular buffer on the device.
*/
if (!len2)
return 1;
if (!dev_read(dev, offset2, len2, buf + len)) {
log_error("Circular read from %s failed",
dev_name(dev));
return 0;
}
memcpy(retbuf, DEV_DEVBUF_DATA(dev, reason), len);
return 1;
}
/*
* Read from 'dev' in 2 distinct regions, denoted by (offset,len) and (offset2,len2).
* Caller is responsible for dm_free().
*/
const char *dev_read_circular(struct device *dev, uint64_t offset, size_t len,
uint64_t offset2, size_t len2, dev_io_reason_t reason)
{
char *buf = NULL;
if (!(buf = dm_malloc(len + len2))) {
log_error("Buffer allocation failed for split metadata.");
return NULL;
}
if (!dev_read_buf(dev, offset, len, reason, buf)) {
log_error("Read from %s failed", dev_name(dev));
dm_free(buf);
return NULL;
}
if (!dev_read_buf(dev, offset2, len2, reason, buf + len)) {
log_error("Circular read from %s failed", dev_name(dev));
dm_free(buf);
return NULL;
}
return buf;
}
/* FIXME If O_DIRECT can't extend file, dev_extend first; dev_truncate after.
* But fails if concurrent processes writing
*/
/* FIXME pre-extend the file */
int dev_append(struct device *dev, size_t len, char *buffer)
int dev_append(struct device *dev, size_t len, dev_io_reason_t reason, char *buffer)
{
int r;
if (!dev->open_count)
return_0;
r = dev_write(dev, dev->end, len, buffer);
r = dev_write(dev, dev->end, len, reason, buffer);
dev->end += (uint64_t) len;
#ifndef O_DIRECT_SUPPORT
@@ -767,7 +1220,7 @@ int dev_append(struct device *dev, size_t len, char *buffer)
return r;
}
int dev_write(struct device *dev, uint64_t offset, size_t len, void *buffer)
int dev_write(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, void *buffer)
{
struct device_area where;
int ret;
@@ -778,23 +1231,28 @@ int dev_write(struct device *dev, uint64_t offset, size_t len, void *buffer)
if (!_dev_is_valid(dev))
return 0;
if (!len) {
log_error(INTERNAL_ERROR "Attempted to write 0 bytes to %s at " FMTu64, dev_name(dev), offset);
return 0;
}
where.dev = dev;
where.start = offset;
where.size = len;
dev->flags |= DEV_ACCESSED_W;
ret = _aligned_io(&where, buffer, 1);
ret = _aligned_io(&where, buffer, 1, reason, 0, NULL, NULL);
if (!ret)
_dev_inc_error_count(dev);
return ret;
}
int dev_set(struct device *dev, uint64_t offset, size_t len, int value)
int dev_set(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, int value)
{
size_t s;
char buffer[4096] __attribute__((aligned(8)));
char buffer[4096] __attribute__((aligned(4096)));
if (!dev_open(dev))
return_0;
@@ -810,7 +1268,7 @@ int dev_set(struct device *dev, uint64_t offset, size_t len, int value)
memset(buffer, value, sizeof(buffer));
while (1) {
s = len > sizeof(buffer) ? sizeof(buffer) : len;
if (!dev_write(dev, offset, s, buffer))
if (!dev_write(dev, offset, s, reason, buffer))
break;
len -= s;

View File

@@ -31,7 +31,7 @@ int dev_is_luks(struct device *dev, uint64_t *offset_found)
if (offset_found)
*offset_found = 0;
if (!dev_read(dev, 0, LUKS_SIGNATURE_SIZE, buf))
if (!dev_read_buf(dev, 0, LUKS_SIGNATURE_SIZE, DEV_IO_SIGNATURES, buf))
goto_out;
ret = memcmp(buf, LUKS_SIGNATURE, LUKS_SIGNATURE_SIZE) ? 0 : 1;

View File

@@ -37,7 +37,7 @@ static int _dev_has_md_magic(struct device *dev, uint64_t sb_offset)
uint32_t md_magic;
/* Version 1 is little endian; version 0.90.0 is machine endian */
if (dev_read(dev, sb_offset, sizeof(uint32_t), &md_magic) &&
if (dev_read_buf(dev, sb_offset, sizeof(uint32_t), DEV_IO_SIGNATURES, &md_magic) &&
((md_magic == MD_SB_MAGIC) ||
((MD_SB_MAGIC != xlate32(MD_SB_MAGIC)) && (md_magic == xlate32(MD_SB_MAGIC)))))
return 1;
@@ -261,8 +261,7 @@ out:
/*
* Retrieve chunk size from md device using sysfs.
*/
static unsigned long dev_md_chunk_size(struct dev_types *dt,
struct device *dev)
static unsigned long _dev_md_chunk_size(struct dev_types *dt, struct device *dev)
{
const char *attribute = "chunk_size";
unsigned long chunk_size_bytes = 0UL;
@@ -280,7 +279,7 @@ static unsigned long dev_md_chunk_size(struct dev_types *dt,
/*
* Retrieve level from md device using sysfs.
*/
static int dev_md_level(struct dev_types *dt, struct device *dev)
static int _dev_md_level(struct dev_types *dt, struct device *dev)
{
char level_string[MD_MAX_SYSFS_SIZE];
const char *attribute = "level";
@@ -303,7 +302,7 @@ static int dev_md_level(struct dev_types *dt, struct device *dev)
/*
* Retrieve raid_disks from md device using sysfs.
*/
static int dev_md_raid_disks(struct dev_types *dt, struct device *dev)
static int _dev_md_raid_disks(struct dev_types *dt, struct device *dev)
{
const char *attribute = "raid_disks";
int raid_disks = 0;
@@ -327,15 +326,15 @@ unsigned long dev_md_stripe_width(struct dev_types *dt, struct device *dev)
unsigned long stripe_width_sectors = 0UL;
int level, raid_disks, data_disks;
chunk_size_sectors = dev_md_chunk_size(dt, dev);
chunk_size_sectors = _dev_md_chunk_size(dt, dev);
if (!chunk_size_sectors)
return 0;
level = dev_md_level(dt, dev);
level = _dev_md_level(dt, dev);
if (level < 0)
return 0;
raid_disks = dev_md_raid_disks(dt, dev);
raid_disks = _dev_md_raid_disks(dt, dev);
if (!raid_disks)
return 0;

View File

@@ -20,8 +20,7 @@
#define MAX_PAGESIZE (64 * 1024)
#define SIGNATURE_SIZE 10
static int
_swap_detect_signature(const char *buf)
static int _swap_detect_signature(const char *buf)
{
if (memcmp(buf, "SWAP-SPACE", 10) == 0 ||
memcmp(buf, "SWAPSPACE2", 10) == 0)
@@ -61,8 +60,7 @@ int dev_is_swap(struct device *dev, uint64_t *offset_found)
continue;
if (size < (page >> SECTOR_SHIFT))
break;
if (!dev_read(dev, page - SIGNATURE_SIZE,
SIGNATURE_SIZE, buf)) {
if (!dev_read_buf(dev, page - SIGNATURE_SIZE, SIGNATURE_SIZE, DEV_IO_SIGNATURES, buf)) {
ret = -1;
break;
}

View File

@@ -363,7 +363,7 @@ static int _has_partition_table(struct device *dev)
uint16_t magic;
} __attribute__((packed)) buf; /* sizeof() == SECTOR_SIZE */
if (!dev_read(dev, UINT64_C(0), sizeof(buf), &buf))
if (!dev_read_buf(dev, UINT64_C(0), sizeof(buf), DEV_IO_SIGNATURES, &buf))
return_0;
/* FIXME Check for other types of partition table too */
@@ -675,7 +675,7 @@ static int _blkid_wipe(blkid_probe probe, struct device *dev, const char *name,
} else
log_verbose(_msg_wiping, type, name);
if (!dev_set(dev, offset_value, len, 0)) {
if (!dev_set(dev, offset_value, len, DEV_IO_SIGNATURES, 0)) {
log_error("Failed to wipe %s signature on %s.", type, name);
return 0;
}
@@ -772,7 +772,7 @@ static int _wipe_signature(struct device *dev, const char *type, const char *nam
}
log_print_unless_silent("Wiping %s on %s.", type, name);
if (!dev_set(dev, offset_found, wipe_len, 0)) {
if (!dev_set(dev, offset_found, wipe_len, DEV_IO_SIGNATURES, 0)) {
log_error("Failed to wipe %s on %s.", type, name);
return 0;
}

View File

@@ -32,6 +32,18 @@
#define DEV_ASSUMED_FOR_LV 0x00000200 /* Is device assumed for an LV */
#define DEV_NOT_O_NOATIME 0x00000400 /* Don't use O_NOATIME */
/* ioflags */
#define AIO_SUPPORTED_CODE_PATH 0x00000001 /* Set if the code path supports AIO */
#define aio_supported_code_path(ioflags) (((ioflags) & AIO_SUPPORTED_CODE_PATH) ? 1 : 0)
/*
* Standard format for callback functions.
* When provided, callback functions are called exactly once.
* If failed is set, data cannot be accessed.
*/
typedef void (*lvm_callback_fn_t)(int failed, unsigned ioflags, void *context, const void *data);
/*
* Support for external device info.
* Any new external device info source needs to be
@@ -49,6 +61,48 @@ struct dev_ext {
void *handle;
};
/*
* All I/O is annotated with the reason it is performed.
*/
typedef enum dev_io_reason {
DEV_IO_SIGNATURES = 0, /* Scanning device signatures */
DEV_IO_LABEL, /* LVM PV disk label */
DEV_IO_MDA_HEADER, /* Text format metadata area header */
DEV_IO_MDA_CONTENT, /* Text format metadata area content */
DEV_IO_MDA_EXTRA_HEADER, /* Header of any extra metadata areas on device */
DEV_IO_MDA_EXTRA_CONTENT, /* Content of any extra metadata areas on device */
DEV_IO_FMT1, /* Original LVM1 metadata format */
DEV_IO_POOL, /* Pool metadata format */
DEV_IO_LV, /* Content written to an LV */
DEV_IO_LOG /* Logging messages */
} dev_io_reason_t;
/*
* Is this I/O for a device's extra metadata area?
*/
#define EXTRA_IO(reason) ((reason) == DEV_IO_MDA_EXTRA_HEADER || (reason) == DEV_IO_MDA_EXTRA_CONTENT)
#define DEV_DEVBUF(dev, reason) (EXTRA_IO((reason)) ? &(dev)->last_extra_devbuf : &(dev)->last_devbuf)
#define DEV_DEVBUF_DATA(dev, reason) ((char *) DEV_DEVBUF((dev), (reason))->buf + DEV_DEVBUF((dev), (reason))->data_offset)
struct device_area {
struct device *dev;
uint64_t start; /* Bytes */
uint64_t size; /* Bytes */
};
struct device_buffer {
uint64_t data_offset; /* Offset to start of requested data within buf */
void *malloc_address; /* Start of allocated memory */
void *buf; /* Aligned buffer that contains data within it */
struct device_area where; /* Location of buf */
dev_io_reason_t reason;
unsigned write:1; /* 1 if write; 0 if read */
lvm_callback_fn_t dev_read_callback_fn;
void *dev_read_callback_context;
struct dm_list aio_queued; /* Queue of async I/O waiting to be issued */
};
/*
* All devices in LVM will be represented by one of these.
* pointer comparisons are valid.
@@ -71,6 +125,8 @@ struct device {
uint64_t end;
struct dm_list open_list;
struct dev_ext ext;
struct device_buffer last_devbuf; /* Last data buffer read from the device */
struct device_buffer last_extra_devbuf; /* Last data buffer read from the device for extra metadata area */
const char *vgid; /* if device is an LV */
const char *lvid; /* if device is an LV */
@@ -84,12 +140,6 @@ struct device_list {
struct device *dev;
};
struct device_area {
struct device *dev;
uint64_t start; /* Bytes */
uint64_t size; /* Bytes */
};
/*
* Support for external device info.
*/
@@ -129,19 +179,37 @@ int dev_test_excl(struct device *dev);
int dev_fd(struct device *dev);
const char *dev_name(const struct device *dev);
int dev_read(struct device *dev, uint64_t offset, size_t len, void *buffer);
int dev_read_circular(struct device *dev, uint64_t offset, size_t len,
uint64_t offset2, size_t len2, char *buf);
int dev_write(struct device *dev, uint64_t offset, size_t len, void *buffer);
int dev_append(struct device *dev, size_t len, char *buffer);
int dev_set(struct device *dev, uint64_t offset, size_t len, int value);
/* Returns a read-only buffer */
const char *dev_read(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason);
const char *dev_read_circular(struct device *dev, uint64_t offset, size_t len,
uint64_t offset2, size_t len2, dev_io_reason_t reason);
/* Passes the data (or error) to dev_read_callback_fn */
void dev_read_callback(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason,
unsigned ioflags, lvm_callback_fn_t dev_read_callback_fn, void *callback_context);
/* Read data and copy it into a supplied private buffer. */
/* Only use for tiny reads or on unimportant code paths. */
int dev_read_buf(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, void *retbuf);
int dev_write(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, void *buffer);
int dev_append(struct device *dev, size_t len, dev_io_reason_t reason, char *buffer);
int dev_set(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, int value);
void dev_flush(struct device *dev);
struct device *dev_create_file(const char *filename, struct device *dev,
struct dm_str_list *alias, int use_malloc);
void dev_destroy_file(struct device *dev);
void devbufs_release(struct device *dev);
/* Return a valid device name from the alias list; NULL otherwise */
const char *dev_name_confirmed(struct device *dev, int quiet);
struct cmd_context;
int dev_async_getevents(void);
int dev_async_setup(struct cmd_context *cmd);
void dev_async_exit(void);
int dev_async_reset(struct cmd_context *cmd);
#endif

View File

@@ -52,13 +52,13 @@ static void _composite_destroy(struct dev_filter *f)
dm_free(f);
}
static int _dump(struct dev_filter *f, int merge_existing)
static int _dump(struct dev_filter *f, struct dm_pool *mem, int merge_existing)
{
struct dev_filter **filters;
for (filters = (struct dev_filter **) f->private; *filters; ++filters)
if ((*filters)->dump &&
!(*filters)->dump(*filters, merge_existing))
!(*filters)->dump(*filters, mem, merge_existing))
return_0;
return 1;

View File

@@ -74,7 +74,7 @@ struct dev_filter *internal_filter_create(void)
f->destroy = _destroy;
f->use_count = 0;
log_debug_devs("internal filter initialised.");
log_debug_devs("Internal filter initialised.");
return f;
}

View File

@@ -87,7 +87,7 @@ static int _read_array(struct pfilter *pf, struct dm_config_tree *cft,
return 1;
}
int persistent_filter_load(struct dev_filter *f, struct dm_config_tree **cft_out)
int persistent_filter_load(struct dm_pool *mem, struct dev_filter *f, struct dm_config_tree **cft_out)
{
struct pfilter *pf = (struct pfilter *) f->private;
struct dm_config_tree *cft;
@@ -116,9 +116,10 @@ int persistent_filter_load(struct dev_filter *f, struct dm_config_tree **cft_out
if (!(cft = config_open(CONFIG_FILE_SPECIAL, pf->file, 1)))
return_0;
if (!config_file_read(cft))
if (!config_file_read(mem, cft))
goto_out;
log_debug_devs("Loading persistent filter cache from %s", pf->file);
_read_array(pf, cft, "persistent_filter_cache/valid_devices",
PF_GOOD_DEVICE);
/* We don't gain anything by holding invalid devices */
@@ -174,7 +175,7 @@ static void _write_array(struct pfilter *pf, FILE *fp, const char *path,
fprintf(fp, "\n\t]\n");
}
static int _persistent_filter_dump(struct dev_filter *f, int merge_existing)
static int _persistent_filter_dump(struct dev_filter *f, struct dm_pool *mem, int merge_existing)
{
struct pfilter *pf;
char *tmp_file;
@@ -233,7 +234,7 @@ static int _persistent_filter_dump(struct dev_filter *f, int merge_existing)
lvm_stat_ctim(&ts, &info);
if (merge_existing && timespeccmp(&ts, &pf->ctime, !=))
/* Keep cft open to avoid losing lock */
persistent_filter_load(f, &cft);
persistent_filter_load(mem, f, &cft);
tmp_file = alloca(strlen(pf->file) + 5);
sprintf(tmp_file, "%s.tmp", pf->file);

View File

@@ -53,6 +53,6 @@ typedef enum {
} filter_mode_t;
struct dev_filter *usable_filter_create(struct dev_types *dt, filter_mode_t mode);
int persistent_filter_load(struct dev_filter *f, struct dm_config_tree **cft_out);
int persistent_filter_load(struct dm_pool *mem, struct dev_filter *f, struct dm_config_tree **cft_out);
#endif /* _LVM_FILTER_H */

View File

@@ -205,7 +205,7 @@ int munge_pvd(struct device *dev, struct pv_disk *pvd)
static int _read_pvd(struct device *dev, struct pv_disk *pvd)
{
if (!dev_read(dev, UINT64_C(0), sizeof(*pvd), pvd)) {
if (!dev_read_buf(dev, UINT64_C(0), sizeof(*pvd), DEV_IO_FMT1, pvd)) {
log_very_verbose("Failed to read PV data from %s",
dev_name(dev));
return 0;
@@ -216,7 +216,7 @@ static int _read_pvd(struct device *dev, struct pv_disk *pvd)
static int _read_lvd(struct device *dev, uint64_t pos, struct lv_disk *disk)
{
if (!dev_read(dev, pos, sizeof(*disk), disk))
if (!dev_read_buf(dev, pos, sizeof(*disk), DEV_IO_FMT1, disk))
return_0;
_xlate_lvd(disk);
@@ -228,7 +228,7 @@ int read_vgd(struct device *dev, struct vg_disk *vgd, struct pv_disk *pvd)
{
uint64_t pos = pvd->vg_on_disk.base;
if (!dev_read(dev, pos, sizeof(*vgd), vgd))
if (!dev_read_buf(dev, pos, sizeof(*vgd), DEV_IO_FMT1, vgd))
return_0;
_xlate_vgd(vgd);
@@ -252,7 +252,7 @@ static int _read_uuids(struct disk_list *data)
uint64_t end = pos + data->pvd.pv_uuidlist_on_disk.size;
while (pos < end && num_read < data->vgd.pv_cur) {
if (!dev_read(data->dev, pos, sizeof(buffer), buffer))
if (!dev_read_buf(data->dev, pos, sizeof(buffer), DEV_IO_FMT1, buffer))
return_0;
if (!(ul = dm_pool_alloc(data->mem, sizeof(*ul))))
@@ -311,7 +311,7 @@ static int _read_extents(struct disk_list *data)
if (!extents)
return_0;
if (!dev_read(data->dev, pos, len, extents))
if (!dev_read_buf(data->dev, pos, len, DEV_IO_FMT1, extents))
return_0;
_xlate_extents(extents, data->pvd.pe_total);
@@ -539,7 +539,7 @@ static int _write_vgd(struct disk_list *data)
data->pvd.vg_name, dev_name(data->dev), pos, sizeof(*vgd));
_xlate_vgd(vgd);
if (!dev_write(data->dev, pos, sizeof(*vgd), vgd))
if (!dev_write(data->dev, pos, sizeof(*vgd), DEV_IO_FMT1, vgd))
return_0;
_xlate_vgd(vgd);
@@ -564,7 +564,7 @@ static int _write_uuids(struct disk_list *data)
data->pvd.vg_name, dev_name(data->dev),
pos, NAME_LEN);
if (!dev_write(data->dev, pos, NAME_LEN, ul->uuid))
if (!dev_write(data->dev, pos, NAME_LEN, DEV_IO_FMT1, ul->uuid))
return_0;
pos += NAME_LEN;
@@ -580,7 +580,7 @@ static int _write_lvd(struct device *dev, uint64_t pos, struct lv_disk *disk)
pos, sizeof(*disk));
_xlate_lvd(disk);
if (!dev_write(dev, pos, sizeof(*disk), disk))
if (!dev_write(dev, pos, sizeof(*disk), DEV_IO_FMT1, disk))
return_0;
_xlate_lvd(disk);
@@ -595,7 +595,7 @@ static int _write_lvs(struct disk_list *data)
pos = data->pvd.lv_on_disk.base;
if (!dev_set(data->dev, pos, data->pvd.lv_on_disk.size, 0)) {
if (!dev_set(data->dev, pos, data->pvd.lv_on_disk.size, DEV_IO_FMT1, 0)) {
log_error("Couldn't zero lv area on device '%s'",
dev_name(data->dev));
return 0;
@@ -626,7 +626,7 @@ static int _write_extents(struct disk_list *data)
pos, len);
_xlate_extents(extents, data->pvd.pe_total);
if (!dev_write(data->dev, pos, len, extents))
if (!dev_write(data->dev, pos, len, DEV_IO_FMT1, extents))
return_0;
_xlate_extents(extents, data->pvd.pe_total);
@@ -661,7 +661,7 @@ static int _write_pvd(struct disk_list *data)
pos, size);
_xlate_pvd((struct pv_disk *) buf);
if (!dev_write(data->dev, pos, size, buf)) {
if (!dev_write(data->dev, pos, size, DEV_IO_FMT1, buf)) {
dm_free(buf);
return_0;
}

View File

@@ -182,7 +182,7 @@ static struct volume_group *_format1_vg_read(struct format_instance *fid,
struct metadata_area *mda __attribute__((unused)),
struct cached_vg_fmtdata **vg_fmtdata __attribute__((unused)),
unsigned *use_previous_vg __attribute__((unused)),
int single_device __attribute__((unused)))
int single_device __attribute__((unused)), unsigned ioflags)
{
struct volume_group *vg;
struct disk_list *dl;

View File

@@ -54,15 +54,17 @@ static int _lvm1_write(struct label *label __attribute__((unused)), void *buf __
return 0;
}
static int _lvm1_read(struct labeller *l, struct device *dev, void *buf,
struct label **label)
static int _lvm1_read(struct labeller *l, struct device *dev, void *buf, unsigned ioflags,
lvm_callback_fn_t read_label_callback_fn, void *read_label_callback_context)
{
struct pv_disk *pvd = (struct pv_disk *) buf;
struct vg_disk vgd;
struct lvmcache_info *info;
struct label *label = NULL;
const char *vgid = FMT_LVM1_ORPHAN_VG_NAME;
const char *vgname = FMT_LVM1_ORPHAN_VG_NAME;
unsigned exported = 0;
int r = 0;
munge_pvd(dev, pvd);
@@ -76,8 +78,9 @@ static int _lvm1_read(struct labeller *l, struct device *dev, void *buf,
if (!(info = lvmcache_add(l, (char *)pvd->pv_uuid, dev, vgname, vgid,
exported)))
return_0;
*label = lvmcache_get_label(info);
goto_out;
label = lvmcache_get_label(info);
lvmcache_set_device_size(info, ((uint64_t)xlate32(pvd->pv_size)) << SECTOR_SHIFT);
lvmcache_set_ext_version(info, 0);
@@ -86,7 +89,13 @@ static int _lvm1_read(struct labeller *l, struct device *dev, void *buf,
lvmcache_del_bas(info);
lvmcache_make_valid(info);
return 1;
r = 1;
out:
if (read_label_callback_fn)
read_label_callback_fn(!r, 0, read_label_callback_context, label);
return r;
}
static int _lvm1_initialise_label(struct labeller *l __attribute__((unused)), struct label *label)
@@ -109,7 +118,6 @@ struct label_ops _lvm1_ops = {
.can_handle = _lvm1_can_handle,
.write = _lvm1_write,
.read = _lvm1_read,
.verify = _lvm1_can_handle,
.initialise_label = _lvm1_initialise_label,
.destroy_label = _lvm1_destroy_label,
.destroy = _lvm1_destroy,

View File

@@ -40,7 +40,7 @@ static int __read_pool_disk(const struct format_type *fmt, struct device *dev,
char buf[512] __attribute__((aligned(8)));
/* FIXME: Need to check the cache here first */
if (!dev_read(dev, UINT64_C(0), 512, buf)) {
if (!dev_read_buf(dev, UINT64_C(0), 512, DEV_IO_POOL, buf)) {
log_very_verbose("Failed to read PV data from %s",
dev_name(dev));
return 0;

View File

@@ -103,7 +103,7 @@ static struct volume_group *_pool_vg_read(struct format_instance *fid,
struct metadata_area *mda __attribute__((unused)),
struct cached_vg_fmtdata **vg_fmtdata __attribute__((unused)),
unsigned *use_previous_vg __attribute__((unused)),
int single_device __attribute__((unused)))
int single_device __attribute__((unused)), unsigned ioflags)
{
struct volume_group *vg;
struct user_subpool *usp;

View File

@@ -55,12 +55,19 @@ static int _pool_write(struct label *label __attribute__((unused)), void *buf __
return 0;
}
static int _pool_read(struct labeller *l, struct device *dev, void *buf,
struct label **label)
static int _pool_read(struct labeller *l, struct device *dev, void *buf, unsigned ioflags,
lvm_callback_fn_t read_label_callback_fn, void *read_label_callback_context)
{
struct pool_list pl;
struct label *label;
int r;
return read_pool_label(&pl, l, dev, buf, label);
r = read_pool_label(&pl, l, dev, buf, &label);
if (read_label_callback_fn)
read_label_callback_fn(!r, 0, read_label_callback_context, label);
return r;
}
static int _pool_initialise_label(struct labeller *l __attribute__((unused)), struct label *label)
@@ -83,7 +90,6 @@ struct label_ops _pool_ops = {
.can_handle = _pool_can_handle,
.write = _pool_write,
.read = _pool_read,
.verify = _pool_can_handle,
.initialise_label = _pool_initialise_label,
.destroy_label = _pool_destroy_label,
.destroy = _label_pool_destroy,

View File

@@ -320,7 +320,7 @@ struct volume_group *backup_read_vg(struct cmd_context *cmd,
}
dm_list_iterate_items(mda, &tf->metadata_areas_in_use) {
if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, NULL, 0)))
if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, NULL, 0, 0)))
stack;
break;
}
@@ -413,7 +413,7 @@ int backup_restore_vg(struct cmd_context *cmd, struct volume_group *vg,
new_pvl->pv = pv;
dm_list_add(&new_pvs, &new_pvl->list);
log_verbose("Set up physical volume for \"%s\" with %" PRIu64
log_verbose("Set up physical volume for \"%s\" with " FMTu64
" available sectors.", pv_dev_name(pv), pv_size(pv));
}
@@ -493,7 +493,7 @@ int backup_restore_vg(struct cmd_context *cmd, struct volume_group *vg,
return 0;
}
if (!dev_set(dev, UINT64_C(0), (size_t) 2048, 0)) {
if (!dev_set(dev, UINT64_C(0), (size_t) 2048, DEV_IO_LABEL, 0)) {
log_error("%s not wiped: aborting", pv_name);
if (!dev_close(dev))
stack;

View File

@@ -23,6 +23,7 @@
#include "lvm-version.h"
#include "toolcontext.h"
#include "config-util.h"
#include "layout.h"
#include <stdarg.h>
#include <time.h>
@@ -121,13 +122,14 @@ static int _extend_buffer(struct formatter *f)
{
char *newbuf;
log_debug_metadata("Doubling metadata output buffer to %" PRIu32,
log_debug_metadata("Doubling metadata output buffer to " FMTu32,
f->data.buf.size * 2);
if (!(newbuf = dm_realloc(f->data.buf.start,
f->data.buf.size * 2))) {
log_error("Buffer reallocation failed.");
return 0;
}
if (!(newbuf = dm_malloc_aligned(f->data.buf.size * 2, 0)))
return_0;
memcpy(newbuf, f->data.buf.start, f->data.buf.size);
free(f->data.buf.start);
f->data.buf.start = newbuf;
f->data.buf.size *= 2;
@@ -561,15 +563,15 @@ static int _print_pvs(struct formatter *f, struct volume_group *vg)
if (!_out_list(f, &pv->tags, "tags"))
return_0;
outsize(f, pv->size, "dev_size = %" PRIu64, pv->size);
outsize(f, pv->size, "dev_size = " FMTu64, pv->size);
outf(f, "pe_start = %" PRIu64, pv->pe_start);
outf(f, "pe_start = " FMTu64, pv->pe_start);
outsize(f, vg->extent_size * (uint64_t) pv->pe_count,
"pe_count = %u", pv->pe_count);
if (pv->ba_start && pv->ba_size) {
outf(f, "ba_start = %" PRIu64, pv->ba_start);
outsize(f, pv->ba_size, "ba_size = %" PRIu64, pv->ba_size);
outf(f, "ba_start = " FMTu64, pv->ba_start);
outsize(f, pv->ba_size, "ba_size = " FMTu64, pv->ba_size);
}
_dec_indent(f);
@@ -631,7 +633,7 @@ int out_areas(struct formatter *f, const struct lv_segment *seg,
switch (seg_type(seg, s)) {
case AREA_PV:
if (!(pv = seg_pv(seg, s))) {
log_error(INTERNAL_ERROR "Missing PV for area %" PRIu32 " of %s segment of LV %s.",
log_error(INTERNAL_ERROR "Missing PV for area " FMTu32 " of %s segment of LV %s.",
s, type, display_lvname(seg->lv));
return 0;
}
@@ -670,7 +672,7 @@ int out_areas(struct formatter *f, const struct lv_segment *seg,
break;
case AREA_UNASSIGNED:
log_error(INTERNAL_ERROR "Invalid type for area %" PRIu32 " of %s segment of LV %s.",
log_error(INTERNAL_ERROR "Invalid type for area " FMTu32 " of %s segment of LV %s.",
s, type, display_lvname(seg->lv));
return 0;
}
@@ -694,7 +696,7 @@ static int _print_timestamp(struct formatter *f,
"%Y-%m-%d %T %z", local_tm))
buf[0] = 0;
outfc(f, buf, "%s = %" PRIu64, name, (uint64_t) ts);
outfc(f, buf, "%s = " FMTu64, name, (uint64_t) ts);
}
return 1;
@@ -1064,7 +1066,7 @@ size_t text_vg_export_raw(struct volume_group *vg, const char *desc, char **buf)
return_0;
f->data.buf.size = 65536; /* Initial metadata limit */
if (!(f->data.buf.start = dm_malloc(f->data.buf.size))) {
if (!(f->data.buf.start = dm_malloc_aligned(f->data.buf.size, 0))) {
log_error("text_export buffer allocation failed");
goto out;
}
@@ -1079,7 +1081,12 @@ size_t text_vg_export_raw(struct volume_group *vg, const char *desc, char **buf)
goto_out;
}
r = f->data.buf.used + 1;
f->data.buf.used += 1; /* Terminating NUL */
/* Zero fill up to next alignment boundary */
memset(f->data.buf.start + f->data.buf.used, 0, MDA_ALIGNMENT - f->data.buf.used % MDA_ALIGNMENT);
r = f->data.buf.used;
*buf = f->data.buf.start;
out:

View File

@@ -48,6 +48,7 @@ static const struct flag _pv_flags[] = {
{EXPORTED_VG, "EXPORTED", STATUS_FLAG},
{MISSING_PV, "MISSING", COMPATIBLE_FLAG},
{MISSING_PV, "MISSING", STATUS_FLAG},
{PV_MOVED_VG, NULL, 0},
{UNLABELLED_PV, NULL, 0},
{0, NULL, 0}
};
@@ -70,7 +71,6 @@ static const struct flag _lv_flags[] = {
{LV_WRITEMOSTLY, "WRITEMOSTLY", STATUS_FLAG},
{LV_ACTIVATION_SKIP, "ACTIVATION_SKIP", COMPATIBLE_FLAG},
{LV_ERROR_WHEN_FULL, "ERROR_WHEN_FULL", COMPATIBLE_FLAG},
{LV_MAINTENANCE, "MAINTENANCE", SEGTYPE_FLAG},
{LV_METADATA_FORMAT, "METADATA_FORMAT", SEGTYPE_FLAG},
{LV_NOSCAN, NULL, 0},
{LV_TEMPORARY, NULL, 0},
@@ -90,8 +90,6 @@ static const struct flag _lv_flags[] = {
{PARTIAL_LV, NULL, 0},
{POSTORDER_FLAG, NULL, 0},
{VIRTUAL_ORIGIN, NULL, 0},
{REPLICATOR, NULL, 0},
{REPLICATOR_LOG, NULL, 0},
{THIN_VOLUME, NULL, 0},
{THIN_POOL, NULL, 0},
{THIN_POOL_DATA, NULL, 0},

File diff suppressed because it is too large Load Diff

View File

@@ -76,19 +76,21 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid,
struct cached_vg_fmtdata **vg_fmtdata,
unsigned *use_previous_vg,
int single_device,
struct device *dev,
struct device *dev, int primary_mda,
off_t offset, uint32_t size,
off_t offset2, uint32_t size2,
checksum_fn_t checksum_fn,
uint32_t checksum,
uint32_t checksum, unsigned ioflags,
time_t *when, char **desc);
int text_vgsummary_import(const struct format_type *fmt,
struct device *dev,
struct device *dev, dev_io_reason_t reason,
off_t offset, uint32_t size,
off_t offset2, uint32_t size2,
checksum_fn_t checksum_fn,
int checksum_only,
struct lvmcache_vgsummary *vgsummary);
int checksum_only, unsigned ioflags,
struct lvmcache_vgsummary *vgsummary,
lvm_callback_fn_t process_vgsummary_fn,
void *process_vgsummary_context);
#endif

View File

@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -16,6 +16,7 @@
#include "lib.h"
#include "metadata.h"
#include "import-export.h"
#include "toolcontext.h"
/* FIXME Use tidier inclusion method */
static struct text_vg_version_ops *(_text_vsn_list[2]);
@@ -32,58 +33,102 @@ static void _init_text_import(void)
_text_import_initialised = 1;
}
/*
* Find out vgname on a given device.
*/
int text_vgsummary_import(const struct format_type *fmt,
struct device *dev,
off_t offset, uint32_t size,
off_t offset2, uint32_t size2,
checksum_fn_t checksum_fn,
int checksum_only,
struct lvmcache_vgsummary *vgsummary)
{
struct import_vgsummary_params {
const struct format_type *fmt;
struct dm_config_tree *cft;
int checksum_only;
struct lvmcache_vgsummary *vgsummary;
lvm_callback_fn_t process_vgsummary_fn;
void *process_vgsummary_context;
int ret;
};
static void _import_vgsummary(int failed, unsigned ioflags, void *context, const void *data)
{
struct import_vgsummary_params *ivsp = context;
struct text_vg_version_ops **vsn;
int r = 0;
_init_text_import();
if (!(cft = config_open(CONFIG_FILE_SPECIAL, NULL, 0)))
return_0;
if ((!dev && !config_file_read(cft)) ||
(dev && !config_file_read_fd(cft, dev, offset, size,
offset2, size2, checksum_fn,
vgsummary->mda_checksum,
checksum_only, 1))) {
log_error("Couldn't read volume group metadata.");
goto out;
if (failed) {
ivsp->ret = 0;
goto_out;
}
if (checksum_only) {
if (ivsp->checksum_only)
/* Checksum matches already-cached content - no need to reparse. */
r = 1;
goto out;
}
/*
* Find a set of version functions that can read this file
*/
for (vsn = &_text_vsn_list[0]; *vsn; vsn++) {
if (!(*vsn)->check_version(cft))
if (!(*vsn)->check_version(ivsp->cft))
continue;
if (!(*vsn)->read_vgsummary(fmt, cft, vgsummary))
if (!(*vsn)->read_vgsummary(ivsp->fmt, ivsp->cft, ivsp->vgsummary)) {
ivsp->ret = 0;
goto_out;
}
r = 1;
break;
goto out;
}
out:
config_destroy(cft);
return r;
/* Nothing found */
ivsp->ret = 0;
out:
config_destroy(ivsp->cft);
if (ivsp->process_vgsummary_fn)
ivsp->process_vgsummary_fn(!ivsp->ret, ioflags, ivsp->process_vgsummary_context, NULL);
}
/*
* Find out vgname on a given device.
*/
int text_vgsummary_import(const struct format_type *fmt,
struct device *dev, dev_io_reason_t reason,
off_t offset, uint32_t size,
off_t offset2, uint32_t size2,
checksum_fn_t checksum_fn,
int checksum_only, unsigned ioflags,
struct lvmcache_vgsummary *vgsummary,
lvm_callback_fn_t process_vgsummary_fn,
void *process_vgsummary_context)
{
struct import_vgsummary_params *ivsp;
_init_text_import();
if (!(ivsp = dm_pool_zalloc(fmt->cmd->mem, sizeof(*ivsp)))) {
log_error("Failed to allocate import_vgsummary_params struct.");
return 0;
}
if (!(ivsp->cft = config_open(CONFIG_FILE_SPECIAL, NULL, 0)))
return_0;
ivsp->fmt = fmt;
ivsp->checksum_only = checksum_only;
ivsp->vgsummary = vgsummary;
ivsp->process_vgsummary_fn = process_vgsummary_fn;
ivsp->process_vgsummary_context = process_vgsummary_context;
ivsp->ret = 1;
if (!dev) {
if (!config_file_read(fmt->cmd->mem, ivsp->cft)) {
log_error("Couldn't read volume group metadata.");
ivsp->ret = 0;
}
_import_vgsummary(!ivsp->ret, ioflags, ivsp, NULL);
} else if (!config_file_read_fd(fmt->cmd->mem, ivsp->cft, dev, reason, offset, size,
offset2, size2, checksum_fn,
vgsummary->mda_checksum,
checksum_only, 1, ioflags, &_import_vgsummary, ivsp)) {
log_error("Couldn't read volume group metadata.");
return 0;
}
return ivsp->ret;
}
struct cached_vg_fmtdata {
@@ -91,51 +136,30 @@ struct cached_vg_fmtdata {
size_t cached_mda_size;
};
struct volume_group *text_vg_import_fd(struct format_instance *fid,
const char *file,
struct cached_vg_fmtdata **vg_fmtdata,
unsigned *use_previous_vg,
int single_device,
struct device *dev,
off_t offset, uint32_t size,
off_t offset2, uint32_t size2,
checksum_fn_t checksum_fn,
uint32_t checksum,
time_t *when, char **desc)
{
struct volume_group *vg = NULL;
struct import_vg_params {
struct format_instance *fid;
struct dm_config_tree *cft;
struct text_vg_version_ops **vsn;
int single_device;
int skip_parse;
unsigned *use_previous_vg;
struct volume_group *vg;
uint32_t checksum;
uint32_t total_size;
time_t *when;
struct cached_vg_fmtdata **vg_fmtdata;
char **desc;
};
if (vg_fmtdata && !*vg_fmtdata &&
!(*vg_fmtdata = dm_pool_zalloc(fid->mem, sizeof(**vg_fmtdata)))) {
log_error("Failed to allocate VG fmtdata for text format.");
return NULL;
}
static void _import_vg(int failed, unsigned ioflags, void *context, const void *data)
{
struct import_vg_params *ivp = context;
struct text_vg_version_ops **vsn;
_init_text_import();
ivp->vg = NULL;
*desc = NULL;
*when = 0;
if (!(cft = config_open(CONFIG_FILE_SPECIAL, file, 0)))
return_NULL;
/* Does the metadata match the already-cached VG? */
skip_parse = vg_fmtdata &&
((*vg_fmtdata)->cached_mda_checksum == checksum) &&
((*vg_fmtdata)->cached_mda_size == (size + size2));
if ((!dev && !config_file_read(cft)) ||
(dev && !config_file_read_fd(cft, dev, offset, size,
offset2, size2, checksum_fn, checksum,
skip_parse, 1)))
goto_out;
if (skip_parse) {
if (use_previous_vg)
*use_previous_vg = 1;
if (ivp->skip_parse) {
if (ivp->use_previous_vg)
*ivp->use_previous_vg = 1;
goto out;
}
@@ -143,35 +167,98 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid,
* Find a set of version functions that can read this file
*/
for (vsn = &_text_vsn_list[0]; *vsn; vsn++) {
if (!(*vsn)->check_version(cft))
if (!(*vsn)->check_version(ivp->cft))
continue;
if (!(vg = (*vsn)->read_vg(fid, cft, single_device, 0)))
if (!(ivp->vg = (*vsn)->read_vg(ivp->fid, ivp->cft, ivp->single_device, 0)))
goto_out;
(*vsn)->read_desc(vg->vgmem, cft, when, desc);
(*vsn)->read_desc(ivp->vg->vgmem, ivp->cft, ivp->when, ivp->desc);
break;
}
if (vg && vg_fmtdata && *vg_fmtdata) {
(*vg_fmtdata)->cached_mda_size = (size + size2);
(*vg_fmtdata)->cached_mda_checksum = checksum;
if (ivp->vg && ivp->vg_fmtdata && *ivp->vg_fmtdata) {
(*ivp->vg_fmtdata)->cached_mda_size = ivp->total_size;
(*ivp->vg_fmtdata)->cached_mda_checksum = ivp->checksum;
}
if (use_previous_vg)
*use_previous_vg = 0;
if (ivp->use_previous_vg)
*ivp->use_previous_vg = 0;
out:
config_destroy(cft);
return vg;
out:
config_destroy(ivp->cft);
}
struct volume_group *text_vg_import_fd(struct format_instance *fid,
const char *file,
struct cached_vg_fmtdata **vg_fmtdata,
unsigned *use_previous_vg,
int single_device,
struct device *dev, int primary_mda,
off_t offset, uint32_t size,
off_t offset2, uint32_t size2,
checksum_fn_t checksum_fn,
uint32_t checksum, unsigned ioflags,
time_t *when, char **desc)
{
struct import_vg_params *ivp;
if (vg_fmtdata && !*vg_fmtdata &&
!(*vg_fmtdata = dm_pool_zalloc(fid->mem, sizeof(**vg_fmtdata)))) {
log_error("Failed to allocate VG fmtdata for text format.");
return NULL;
}
if (!(ivp = dm_pool_zalloc(fid->fmt->cmd->mem, sizeof(*ivp)))) {
log_error("Failed to allocate import_vgsummary_params struct.");
return NULL;
}
_init_text_import();
ivp->fid = fid;
ivp->when = when;
*ivp->when = 0;
ivp->desc = desc;
*ivp->desc = NULL;
ivp->single_device = single_device;
ivp->use_previous_vg = use_previous_vg;
ivp->checksum = checksum;
ivp->total_size = size + size2;
ivp->vg_fmtdata = vg_fmtdata;
if (!(ivp->cft = config_open(CONFIG_FILE_SPECIAL, file, 0)))
return_NULL;
/* Does the metadata match the already-cached VG? */
ivp->skip_parse = vg_fmtdata &&
((*vg_fmtdata)->cached_mda_checksum == checksum) &&
((*vg_fmtdata)->cached_mda_size == ivp->total_size);
if (!dev && !config_file_read(fid->mem, ivp->cft)) {
config_destroy(ivp->cft);
return_NULL;
}
if (dev) {
if (!config_file_read_fd(fid->mem, ivp->cft, dev, MDA_CONTENT_REASON(primary_mda), offset, size,
offset2, size2, checksum_fn, checksum,
ivp->skip_parse, 1, ioflags, &_import_vg, ivp)) {
config_destroy(ivp->cft);
return_NULL;
}
} else
_import_vg(0, 0, ivp, NULL);
return ivp->vg;
}
struct volume_group *text_vg_import_file(struct format_instance *fid,
const char *file,
time_t *when, char **desc)
{
return text_vg_import_fd(fid, file, NULL, NULL, 0, NULL, (off_t)0, 0, (off_t)0, 0, NULL, 0,
when, desc);
return text_vg_import_fd(fid, file, NULL, NULL, 0, NULL, 0, (off_t)0, 0, (off_t)0, 0, NULL, 0,
0, when, desc);
}
static struct volume_group *_import_vg_from_config_tree(const struct dm_config_tree *cft,

View File

@@ -17,6 +17,7 @@
#define _LVM_TEXT_LAYOUT_H
#include "config.h"
#include "format-text.h"
#include "metadata.h"
#include "lvmcache.h"
#include "uuid.h"
@@ -80,8 +81,9 @@ struct mda_header {
struct raw_locn raw_locns[0]; /* NULL-terminated list */
} __attribute__ ((packed));
struct mda_header *raw_read_mda_header(const struct format_type *fmt,
struct device_area *dev_area);
struct mda_header *raw_read_mda_header(struct dm_pool *mem, struct device_area *dev_area, int primary_mda);
int raw_read_mda_header_callback(struct dm_pool *mem, struct device_area *dev_area, int primary_mda,
unsigned ioflags, lvm_callback_fn_t mdah_callback_fn, void *mdah_callback_context);
struct mda_lists {
struct dm_list dirs;
@@ -102,9 +104,12 @@ struct mda_context {
#define MDA_HEADER_SIZE 512
#define LVM2_LABEL "LVM2 001"
#define MDA_SIZE_MIN (8 * (unsigned) lvm_getpagesize())
#define MDA_ORIGINAL_ALIGNMENT 512 /* Original alignment used for start of VG metadata content */
#define MDA_ALIGNMENT 4096 /* Default alignment in bytes since 2.02.177 for start of VG metadata content. */
int vgname_from_mda(const struct format_type *fmt, struct mda_header *mdah,
int vgname_from_mda(const struct format_type *fmt, const struct mda_header *mdah, int primary_mda,
struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary,
uint64_t *mda_free_sectors);
uint64_t *mda_free_sectors, unsigned ioflags,
lvm_callback_fn_t update_vgsummary_callback_fn, void *update_vgsummary_callback_context);
#endif

View File

@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -19,6 +19,7 @@
#include "label.h"
#include "xlate.h"
#include "lvmcache.h"
#include "toolcontext.h"
#include <sys/stat.h>
#include <fcntl.h>
@@ -35,14 +36,14 @@ static int _text_can_handle(struct labeller *l __attribute__((unused)),
return 0;
}
struct _dl_setup_baton {
struct dl_setup_baton {
struct disk_locn *pvh_dlocn_xl;
struct device *dev;
};
static int _da_setup(struct disk_locn *da, void *baton)
{
struct _dl_setup_baton *p = baton;
struct dl_setup_baton *p = baton;
p->pvh_dlocn_xl->offset = xlate64(da->offset);
p->pvh_dlocn_xl->size = xlate64(da->size);
p->pvh_dlocn_xl++;
@@ -56,7 +57,7 @@ static int _ba_setup(struct disk_locn *ba, void *baton)
static int _mda_setup(struct metadata_area *mda, void *baton)
{
struct _dl_setup_baton *p = baton;
struct dl_setup_baton *p = baton;
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
if (mdac->area.dev != p->dev)
@@ -71,7 +72,7 @@ static int _mda_setup(struct metadata_area *mda, void *baton)
static int _dl_null_termination(void *baton)
{
struct _dl_setup_baton *p = baton;
struct dl_setup_baton *p = baton;
p->pvh_dlocn_xl->offset = xlate64(UINT64_C(0));
p->pvh_dlocn_xl->size = xlate64(UINT64_C(0));
@@ -86,7 +87,7 @@ static int _text_write(struct label *label, void *buf)
struct pv_header *pvhdr;
struct pv_header_extension *pvhdr_ext;
struct lvmcache_info *info;
struct _dl_setup_baton baton;
struct dl_setup_baton baton;
char buffer[64] __attribute__((aligned(8)));
int ba1, da1, mda1, mda2;
@@ -148,7 +149,7 @@ static int _text_write(struct label *label, void *buf)
!xlate64(pvhdr->disk_areas_xl[mda2].size))
mda2 = 0;
log_debug_metadata("%s: Preparing PV label header %s size %" PRIu64 " with"
log_debug_metadata("%s: Preparing PV label header %s size " FMTu64 " with"
"%s%.*" PRIu64 "%s%.*" PRIu64 "%s"
"%s%.*" PRIu64 "%s%.*" PRIu64 "%s"
"%s%.*" PRIu64 "%s%.*" PRIu64 "%s"
@@ -245,9 +246,9 @@ int add_mda(const struct format_type *fmt, struct dm_pool *mem, struct dm_list *
struct device *dev, uint64_t start, uint64_t size, unsigned ignored)
{
/* FIXME List size restricted by pv_header SECTOR_SIZE */
struct metadata_area *mdal;
struct metadata_area *mdal, *mda;
struct mda_lists *mda_lists = (struct mda_lists *) fmt->private;
struct mda_context *mdac;
struct mda_context *mdac, *mdac2;
if (!mem) {
if (!(mdal = dm_malloc(sizeof(struct metadata_area)))) {
@@ -274,13 +275,23 @@ int add_mda(const struct format_type *fmt, struct dm_pool *mem, struct dm_list *
mdal->ops = mda_lists->raw_ops;
mdal->metadata_locn = mdac;
mdal->status = 0;
mdac->area.dev = dev;
mdac->area.start = start;
mdac->area.size = size;
mdac->free_sectors = UINT64_C(0);
memset(&mdac->rlocn, 0, sizeof(mdac->rlocn));
/* Set MDA_PRIMARY only if this is the first metadata area on this device. */
mdal->status = MDA_PRIMARY;
dm_list_iterate_items(mda, mdas) {
mdac2 = mda->metadata_locn;
if (mdac2->area.dev == dev) {
mdal->status = 0;
break;
}
}
mda_set_ignored(mdal, ignored);
dm_list_add(mdas, &mdal->list);
@@ -308,18 +319,106 @@ static int _text_initialise_label(struct labeller *l __attribute__((unused)),
return 1;
}
struct _update_mda_baton {
struct update_mda_baton {
struct lvmcache_info *info;
struct label *label;
int nr_outstanding_mdas;
unsigned ioflags;
lvm_callback_fn_t read_label_callback_fn;
void *read_label_callback_context;
int ret;
};
struct process_mda_header_params {
struct update_mda_baton *umb;
struct metadata_area *mda;
struct device *dev;
struct lvmcache_vgsummary vgsummary;
int ret;
};
static void _process_vgsummary(int failed, unsigned ioflags, void *context, const void *data)
{
struct process_mda_header_params *pmp = context;
const struct lvmcache_vgsummary *vgsummary = data;
--pmp->umb->nr_outstanding_mdas;
/* FIXME Need to distinguish genuine errors here */
if (failed)
goto_out;
if (!lvmcache_update_vgname_and_id(pmp->umb->info, vgsummary)) {
pmp->umb->ret = 0;
pmp->ret = 0;
}
out:
if (!pmp->umb->nr_outstanding_mdas && pmp->umb->ret)
lvmcache_make_valid(pmp->umb->info);
if (!dev_close(pmp->dev))
stack;
if (!pmp->umb->nr_outstanding_mdas && pmp->umb->read_label_callback_fn)
pmp->umb->read_label_callback_fn(!pmp->umb->ret, ioflags, pmp->umb->read_label_callback_context, pmp->umb->label);
}
static void _process_mda_header(int failed, unsigned ioflags, void *context, const void *data)
{
struct process_mda_header_params *pmp = context;
const struct mda_header *mdah = data;
struct update_mda_baton *umb = pmp->umb;
const struct format_type *fmt = umb->label->labeller->fmt;
struct metadata_area *mda = pmp->mda;
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
if (failed)
goto_bad;
mda_set_ignored(mda, rlocn_is_ignored(mdah->raw_locns));
if (mda_is_ignored(mda)) {
log_debug_metadata("Ignoring mda on device %s at offset " FMTu64,
dev_name(mdac->area.dev),
mdac->area.start);
goto bad;
}
if (!vgname_from_mda(fmt, mdah, mda_is_primary(mda), &mdac->area, &pmp->vgsummary, &mdac->free_sectors, ioflags, _process_vgsummary, pmp)) {
/* FIXME Separate fatal and non-fatal error cases? */
goto_bad;
}
return;
bad:
_process_vgsummary(1, ioflags, pmp, NULL);
return;
}
static int _count_mda(struct metadata_area *mda, void *baton)
{
struct update_mda_baton *umb = baton;
umb->nr_outstanding_mdas++;
return 1;
}
static int _update_mda(struct metadata_area *mda, void *baton)
{
struct _update_mda_baton *p = baton;
const struct format_type *fmt = p->label->labeller->fmt;
struct process_mda_header_params *pmp;
struct update_mda_baton *umb = baton;
const struct format_type *fmt = umb->label->labeller->fmt;
struct dm_pool *mem = umb->label->labeller->fmt->cmd->mem;
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
struct mda_header *mdah;
struct lvmcache_vgsummary vgsummary = { 0 };
unsigned ioflags = umb->ioflags;
if (!(pmp = dm_pool_zalloc(mem, sizeof(*pmp)))) {
log_error("struct process_mda_header_params allocation failed");
return 0;
}
/*
* Using the labeller struct to preserve info about
@@ -328,45 +427,34 @@ static int _update_mda(struct metadata_area *mda, void *baton)
* TODO: make lvmcache smarter and move this cache logic there
*/
pmp->dev = mdac->area.dev;
pmp->umb = umb;
pmp->mda = mda;
if (!dev_open_readonly(mdac->area.dev)) {
mda_set_ignored(mda, 1);
stack;
if (!--umb->nr_outstanding_mdas && umb->read_label_callback_fn)
umb->read_label_callback_fn(!umb->ret, ioflags, umb->read_label_callback_context, umb->label);
return 1;
}
if (!(mdah = raw_read_mda_header(fmt, &mdac->area))) {
pmp->ret = 1;
if (!raw_read_mda_header_callback(fmt->cmd->mem, &mdac->area, mda_is_primary(mda), ioflags, _process_mda_header, pmp)) {
_process_vgsummary(1, ioflags, pmp, NULL);
stack;
goto close_dev;
}
mda_set_ignored(mda, rlocn_is_ignored(mdah->raw_locns));
if (mda_is_ignored(mda)) {
log_debug_metadata("Ignoring mda on device %s at offset %"PRIu64,
dev_name(mdac->area.dev),
mdac->area.start);
if (!dev_close(mdac->area.dev))
stack;
return 1;
}
if (vgname_from_mda(fmt, mdah, &mdac->area, &vgsummary,
&mdac->free_sectors) &&
!lvmcache_update_vgname_and_id(p->info, &vgsummary)) {
if (!dev_close(mdac->area.dev))
stack;
return_0;
}
close_dev:
if (!dev_close(mdac->area.dev))
stack;
return 1;
if (umb->read_label_callback_fn)
return 1;
else
return pmp->ret;
}
static int _text_read(struct labeller *l, struct device *dev, void *buf,
struct label **label)
static int _text_read(struct labeller *l, struct device *dev, void *buf, unsigned ioflags,
lvm_callback_fn_t read_label_callback_fn, void *read_label_callback_context)
{
struct label_header *lh = (struct label_header *) buf;
struct pv_header *pvhdr;
@@ -375,7 +463,9 @@ static int _text_read(struct labeller *l, struct device *dev, void *buf,
struct disk_locn *dlocn_xl;
uint64_t offset;
uint32_t ext_version;
struct _update_mda_baton baton;
struct dm_pool *mem = l->fmt->cmd->mem;
struct update_mda_baton *umb;
struct label *label;
/*
* PV header base
@@ -385,9 +475,9 @@ static int _text_read(struct labeller *l, struct device *dev, void *buf,
if (!(info = lvmcache_add(l, (char *)pvhdr->pv_uuid, dev,
FMT_TEXT_ORPHAN_VG_NAME,
FMT_TEXT_ORPHAN_VG_NAME, 0)))
return_0;
goto_bad;
*label = lvmcache_get_label(info);
label = lvmcache_get_label(info);
lvmcache_set_device_size(info, xlate64(pvhdr->device_size_xl));
@@ -418,7 +508,7 @@ static int _text_read(struct labeller *l, struct device *dev, void *buf,
if (!(ext_version = xlate32(pvhdr_ext->version)))
goto out;
log_debug_metadata("%s: PV header extension version %" PRIu32 " found",
log_debug_metadata("%s: PV header extension version " FMTu32 " found",
dev_name(dev), ext_version);
/* Extension version */
@@ -433,16 +523,41 @@ static int _text_read(struct labeller *l, struct device *dev, void *buf,
lvmcache_add_ba(info, offset, xlate64(dlocn_xl->size));
dlocn_xl++;
}
out:
baton.info = info;
baton.label = *label;
if (!(umb = dm_pool_zalloc(mem, sizeof(*umb)))) {
log_error("baton allocation failed");
goto_bad;
}
if (!lvmcache_foreach_mda(info, _update_mda, &baton))
return_0;
umb->info = info;
umb->label = label;
umb->ioflags = ioflags;
umb->read_label_callback_fn = read_label_callback_fn;
umb->read_label_callback_context = read_label_callback_context;
lvmcache_make_valid(info);
umb->ret = 1;
if (!lvmcache_foreach_mda(info, _count_mda, umb))
goto_bad;
if (!umb->nr_outstanding_mdas) {
lvmcache_make_valid(info);
if (read_label_callback_fn)
read_label_callback_fn(0, ioflags, read_label_callback_context, label);
return 1;
}
if (!lvmcache_foreach_mda(info, _update_mda, umb))
goto_bad;
return 1;
bad:
if (read_label_callback_fn)
read_label_callback_fn(1, ioflags, read_label_callback_context, NULL);
return 0;
}
static void _text_destroy_label(struct labeller *l __attribute__((unused)),
@@ -464,7 +579,6 @@ struct label_ops _text_ops = {
.can_handle = _text_can_handle,
.write = _text_write,
.read = _text_read,
.verify = _text_can_handle,
.initialise_label = _text_initialise_label,
.destroy_label = _text_destroy_label,
.destroy = _fmt_text_destroy,

View File

@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
* Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -26,6 +26,8 @@
/* FIXME Allow for larger labels? Restricted to single sector currently */
static struct dm_pool *_labeller_mem;
/*
* Internal labeller struct.
*/
@@ -58,7 +60,13 @@ static struct labeller_i *_alloc_li(const char *name, struct labeller *l)
int label_init(void)
{
if (!(_labeller_mem = dm_pool_create("label scan", 128))) {
log_error("Labeller pool creation failed.");
return 0;
}
dm_list_init(&_labellers);
return 1;
}
@@ -73,6 +81,8 @@ void label_exit(void)
}
dm_list_init(&_labellers);
dm_pool_destroy(_labeller_mem);
}
int label_register_handler(struct labeller *handler)
@@ -109,32 +119,74 @@ static void _update_lvmcache_orphan(struct lvmcache_info *info)
stack;
}
static struct labeller *_find_labeller(struct device *dev, char *buf,
uint64_t *label_sector,
uint64_t scan_sector)
struct find_labeller_params {
struct device *dev;
uint64_t scan_sector; /* Sector to be scanned */
uint64_t label_sector; /* Sector where label found */
lvm_callback_fn_t process_label_data_fn;
void *process_label_data_context;
struct label **result;
int ret;
};
static void _set_label_read_result(int failed, unsigned ioflags, void *context, const void *data)
{
struct find_labeller_params *flp = context;
struct label **result = flp->result;
struct label *label = (struct label *) data;
if (failed) {
flp->ret = 0;
goto_out;
}
/* Fix up device and label sector which the low-level code doesn't set */
if (label) {
label->dev = flp->dev;
label->sector = flp->label_sector;
}
if (result)
*result = (struct label *) label;
out:
if (!dev_close(flp->dev))
stack;
if (flp->process_label_data_fn) {
log_debug_io("Completed label reading for %s", dev_name(flp->dev));
flp->process_label_data_fn(!flp->ret, ioflags, flp->process_label_data_context, NULL);
}
}
static void _find_labeller(int failed, unsigned ioflags, void *context, const void *data)
{
struct find_labeller_params *flp = context;
const char *readbuf = data;
struct device *dev = flp->dev;
uint64_t scan_sector = flp->scan_sector;
char labelbuf[LABEL_SIZE] __attribute__((aligned(8)));
struct labeller_i *li;
struct labeller *r = NULL;
struct label_header *lh;
struct labeller *l = NULL; /* Set when a labeller claims the label */
const struct label_header *lh;
struct lvmcache_info *info;
uint64_t sector;
int found = 0;
char readbuf[LABEL_SCAN_SIZE] __attribute__((aligned(8)));
if (!dev_read(dev, scan_sector << SECTOR_SHIFT,
LABEL_SCAN_SIZE, readbuf)) {
if (failed) {
log_debug_devs("%s: Failed to read label area", dev_name(dev));
goto out;
_set_label_read_result(1, ioflags, flp, NULL);
return;
}
/* Scan a few sectors for a valid label */
for (sector = 0; sector < LABEL_SCAN_SECTORS;
sector += LABEL_SIZE >> SECTOR_SHIFT) {
lh = (struct label_header *) (readbuf +
(sector << SECTOR_SHIFT));
lh = (struct label_header *) (readbuf + (sector << SECTOR_SHIFT));
if (!strncmp((char *)lh->id, LABEL_ID, sizeof(lh->id))) {
if (found) {
if (l) {
log_error("Ignoring additional label on %s at "
"sector %" PRIu64, dev_name(dev),
sector + scan_sector);
@@ -154,7 +206,7 @@ static struct labeller *_find_labeller(struct device *dev, char *buf,
"ignoring", dev_name(dev));
continue;
}
if (found)
if (l)
continue;
}
@@ -165,46 +217,44 @@ static struct labeller *_find_labeller(struct device *dev, char *buf,
"sector %" PRIu64,
dev_name(dev), li->name,
sector + scan_sector);
if (found) {
if (l) {
log_error("Ignoring additional label "
"on %s at sector %" PRIu64,
dev_name(dev),
sector + scan_sector);
continue;
}
r = li->l;
memcpy(buf, lh, LABEL_SIZE);
if (label_sector)
*label_sector = sector + scan_sector;
found = 1;
memcpy(labelbuf, lh, LABEL_SIZE);
flp->label_sector = sector + scan_sector;
l = li->l;
break;
}
}
}
out:
if (!found) {
if (!l) {
if ((info = lvmcache_info_from_pvid(dev->pvid, dev, 0)))
_update_lvmcache_orphan(info);
log_very_verbose("%s: No label detected", dev_name(dev));
}
return r;
flp->ret = 0;
_set_label_read_result(1, ioflags, flp, NULL);
} else
(void) (l->ops->read)(l, dev, labelbuf, ioflags, &_set_label_read_result, flp);
}
/* FIXME Also wipe associated metadata area headers? */
int label_remove(struct device *dev)
{
char buf[LABEL_SIZE] __attribute__((aligned(8)));
char readbuf[LABEL_SCAN_SIZE] __attribute__((aligned(8)));
char labelbuf[LABEL_SIZE] __attribute__((aligned(8)));
int r = 1;
uint64_t sector;
int wipe;
struct labeller_i *li;
struct label_header *lh;
struct lvmcache_info *info;
const char *readbuf = NULL;
memset(buf, 0, LABEL_SIZE);
memset(labelbuf, 0, LABEL_SIZE);
log_very_verbose("Scanning for labels to wipe from %s", dev_name(dev));
@@ -217,7 +267,7 @@ int label_remove(struct device *dev)
*/
dev_flush(dev);
if (!dev_read(dev, UINT64_C(0), LABEL_SCAN_SIZE, readbuf)) {
if (!(readbuf = dev_read(dev, UINT64_C(0), LABEL_SCAN_SIZE, DEV_IO_LABEL))) {
log_debug_devs("%s: Failed to read label area", dev_name(dev));
goto out;
}
@@ -225,8 +275,7 @@ int label_remove(struct device *dev)
/* Scan first few sectors for anything looking like a label */
for (sector = 0; sector < LABEL_SCAN_SECTORS;
sector += LABEL_SIZE >> SECTOR_SHIFT) {
lh = (struct label_header *) (readbuf +
(sector << SECTOR_SHIFT));
lh = (struct label_header *) (readbuf + (sector << SECTOR_SHIFT));
wipe = 0;
@@ -246,8 +295,7 @@ int label_remove(struct device *dev)
if (wipe) {
log_very_verbose("%s: Wiping label at sector %" PRIu64,
dev_name(dev), sector);
if (dev_write(dev, sector << SECTOR_SHIFT, LABEL_SIZE,
buf)) {
if (dev_write(dev, sector << SECTOR_SHIFT, LABEL_SIZE, DEV_IO_LABEL, labelbuf)) {
/* Also remove the PV record from cache. */
info = lvmcache_info_from_pvid(dev->pvid, dev, 0);
if (info)
@@ -268,21 +316,39 @@ int label_remove(struct device *dev)
return r;
}
int label_read(struct device *dev, struct label **result,
uint64_t scan_sector)
static int _label_read(struct device *dev, uint64_t scan_sector, struct label **result,
unsigned ioflags, lvm_callback_fn_t process_label_data_fn, void *process_label_data_context)
{
char buf[LABEL_SIZE] __attribute__((aligned(8)));
struct labeller *l;
uint64_t sector;
struct lvmcache_info *info;
int r = 0;
struct find_labeller_params *flp;
if ((info = lvmcache_info_from_pvid(dev->pvid, dev, 1))) {
log_debug_devs("Reading label from lvmcache for %s", dev_name(dev));
*result = lvmcache_get_label(info);
if (result)
*result = lvmcache_get_label(info);
if (process_label_data_fn) {
log_debug_io("Completed label reading for %s", dev_name(dev));
process_label_data_fn(0, ioflags, process_label_data_context, NULL);
}
return 1;
}
if (!(flp = dm_pool_zalloc(_labeller_mem, sizeof *flp))) {
log_error("find_labeller_params allocation failed.");
return 0;
}
flp->dev = dev;
flp->scan_sector = scan_sector;
flp->result = result;
flp->process_label_data_fn = process_label_data_fn;
flp->process_label_data_context = process_label_data_context;
flp->ret = 1;
/* Ensure result is always wiped as a precaution */
if (result)
*result = NULL;
log_debug_devs("Reading label from device %s", dev_name(dev));
if (!dev_open_readonly(dev)) {
@@ -291,19 +357,26 @@ int label_read(struct device *dev, struct label **result,
if ((info = lvmcache_info_from_pvid(dev->pvid, dev, 0)))
_update_lvmcache_orphan(info);
return r;
return 0;
}
if ((l = _find_labeller(dev, buf, &sector, scan_sector)))
if ((r = (l->ops->read)(l, dev, buf, result)) && result && *result) {
(*result)->dev = dev;
(*result)->sector = sector;
}
dev_read_callback(dev, scan_sector << SECTOR_SHIFT, LABEL_SCAN_SIZE, DEV_IO_LABEL, ioflags, _find_labeller, flp);
if (process_label_data_fn)
return 1;
else
return flp->ret;
}
if (!dev_close(dev))
stack;
/* result may be NULL if caller doesn't need it */
int label_read(struct device *dev, struct label **result, uint64_t scan_sector)
{
return _label_read(dev, scan_sector, result, 0, NULL, NULL);
}
return r;
int label_read_callback(struct device *dev, uint64_t scan_sector, unsigned ioflags,
lvm_callback_fn_t process_label_data_fn, void *process_label_data_context)
{
return _label_read(dev, scan_sector, NULL, ioflags, process_label_data_fn, process_label_data_context);
}
/* Caller may need to use label_get_handler to create label struct! */
@@ -342,7 +415,7 @@ int label_write(struct device *dev, struct label *label)
log_very_verbose("%s: Writing label to sector %" PRIu64 " with stored offset %"
PRIu32 ".", dev_name(dev), label->sector,
xlate32(lh->offset_xl));
if (!dev_write(dev, label->sector << SECTOR_SHIFT, LABEL_SIZE, buf)) {
if (!dev_write(dev, label->sector << SECTOR_SHIFT, LABEL_SIZE, DEV_IO_LABEL, buf)) {
log_debug_devs("Failed to write label to %s", dev_name(dev));
r = 0;
}
@@ -353,33 +426,6 @@ int label_write(struct device *dev, struct label *label)
return r;
}
/* Unused */
int label_verify(struct device *dev)
{
struct labeller *l;
char buf[LABEL_SIZE] __attribute__((aligned(8)));
uint64_t sector;
struct lvmcache_info *info;
int r = 0;
if (!dev_open_readonly(dev)) {
if ((info = lvmcache_info_from_pvid(dev->pvid, dev, 0)))
_update_lvmcache_orphan(info);
return_0;
}
if (!(l = _find_labeller(dev, buf, &sector, UINT64_C(0))))
goto out;
r = l->ops->verify ? l->ops->verify(l, buf, sector) : 1;
out:
if (!dev_close(dev))
stack;
return r;
}
void label_destroy(struct label *label)
{
label->labeller->ops->destroy_label(label->labeller, label);

View File

@@ -62,13 +62,8 @@ struct label_ops {
/*
* Read a label from a volume.
*/
int (*read) (struct labeller * l, struct device * dev,
void *buf, struct label ** label);
/*
* Additional consistency checks for the paranoid.
*/
int (*verify) (struct labeller * l, void *buf, uint64_t sector);
int (*read) (struct labeller *l, struct device *dev, void *buf,
unsigned ioflags, lvm_callback_fn_t label_read_callback_fn, void *label_read_callback_context);
/*
* Populate label_type etc.
@@ -101,8 +96,9 @@ struct labeller *label_get_handler(const char *name);
int label_remove(struct device *dev);
int label_read(struct device *dev, struct label **result,
uint64_t scan_sector);
int label_read_callback(struct device *dev, uint64_t scan_sector,
unsigned ioflags, lvm_callback_fn_t process_label_data_fn, void *process_label_data_context);
int label_write(struct device *dev, struct label *label);
int label_verify(struct device *dev);
struct label *label_create(struct labeller *labeller);
void label_destroy(struct label *label);

View File

@@ -515,7 +515,7 @@ static int _lock_resource(struct cmd_context *cmd, const char *resource,
return _lock_for_cluster(cmd, clvmd_cmd, flags, lockname);
}
static int decode_lock_type(const char *response)
static int _decode_lock_type(const char *response)
{
if (!response)
return LCK_NULL;
@@ -566,8 +566,8 @@ int query_resource(const char *resource, const char *node, int *mode)
* If two nodes report different locks,
* something is broken - just return more important mode.
*/
if (decode_lock_type(response[i].response) > *mode)
*mode = decode_lock_type(response[i].response);
if (_decode_lock_type(response[i].response) > *mode)
*mode = _decode_lock_type(response[i].response);
log_debug_locking("Lock held for %s, node %s : %s", resource,
response[i].node, response[i].response);

View File

@@ -51,8 +51,8 @@ static int _file_lock_resource(struct cmd_context *cmd, const char *resource,
switch (flags & LCK_SCOPE_MASK) {
case LCK_ACTIVATION:
if (dm_snprintf(lockfile, sizeof(lockfile),
"%s/A_%s", _lock_dir, resource + 1) < 0) {
log_error("Too long locking filename %s/A_%s.", _lock_dir, resource + 1);
"%s/A_%s", _lock_dir, resource) < 0) {
log_error("Too long locking filename %s/A_%s.", _lock_dir, resource);
return 0;
}

View File

@@ -371,73 +371,12 @@ int lock_vol(struct cmd_context *cmd, const char *vol, uint32_t flags, const str
return 1;
}
/* Unlock list of LVs */
int resume_lvs(struct cmd_context *cmd, struct dm_list *lvs)
{
struct lv_list *lvl;
int r = 1;
dm_list_iterate_items(lvl, lvs)
if (!resume_lv(cmd, lvl->lv)) {
r = 0;
stack;
}
return r;
}
/* Unlock and revert list of LVs */
int revert_lvs(struct cmd_context *cmd, struct dm_list *lvs)
{
struct lv_list *lvl;
int r = 1;
dm_list_iterate_items(lvl, lvs)
if (!revert_lv(cmd, lvl->lv)) {
r = 0;
stack;
}
return r;
}
/*
* Lock a list of LVs.
* On failure to lock any LV, calls vg_revert() if vg_to_revert is set and
* then unlocks any LVs on the list already successfully locked.
*/
int suspend_lvs(struct cmd_context *cmd, struct dm_list *lvs,
struct volume_group *vg_to_revert)
{
struct lv_list *lvl;
dm_list_iterate_items(lvl, lvs) {
if (!suspend_lv(cmd, lvl->lv)) {
log_error("Failed to suspend %s", lvl->lv->name);
if (vg_to_revert)
vg_revert(vg_to_revert);
/*
* FIXME Should be
* dm_list_uniterate(lvh, lvs, &lvl->list) {
* lvl = dm_list_item(lvh, struct lv_list);
* but revert would need fixing to use identical tree deps first.
*/
dm_list_iterate_items(lvl, lvs)
if (!revert_lv(cmd, lvl->lv))
stack;
return 0;
}
}
return 1;
}
/*
* First try to activate exclusively locally.
* Then if the VG is clustered and the LV is not yet active (e.g. due to
* an activation filter) try activating on remote nodes.
*/
int activate_lv_excl(struct cmd_context *cmd, struct logical_volume *lv)
int activate_lv_excl(struct cmd_context *cmd, const struct logical_volume *lv)
{
/* Non-clustered VGs are only activated locally. */
if (!vg_is_clustered(lv->vg))
@@ -468,14 +407,14 @@ int activate_lvs(struct cmd_context *cmd, struct dm_list *lvs, unsigned exclusiv
dm_list_iterate_items(lvl, lvs) {
if (!exclusive && !lv_is_active_exclusive(lvl->lv)) {
if (!activate_lv(cmd, lvl->lv)) {
log_error("Failed to activate %s", lvl->lv->name);
log_error("Failed to activate %s", display_lvname(lvl->lv));
return 0;
}
} else if (!activate_lv_excl(cmd, lvl->lv)) {
log_error("Failed to activate %s", lvl->lv->name);
log_error("Failed to activate %s", display_lvname(lvl->lv));
dm_list_uniterate(lvh, lvs, &lvl->list) {
lvl = dm_list_item(lvh, struct lv_list);
if (!activate_lv(cmd, lvl->lv))
if (!deactivate_lv(cmd, lvl->lv))
stack;
}
return 0;

View File

@@ -166,22 +166,25 @@ int check_lvm1_vg_inactive(struct cmd_context *cmd, const char *vgname);
#define LCK_LV_CLUSTERED(lv) \
(vg_is_clustered((lv)->vg) ? LCK_CLUSTER_VG : 0)
#define lock_lv_vol(cmd, lv, flags) \
(find_replicator_vgs((lv)) ? \
lock_vol(cmd, (lv)->lvid.s, flags | LCK_LV_CLUSTERED(lv), lv) : \
0)
#define lock_lv_vol(cmd, lv, flags) lock_vol(cmd, (lv)->lvid.s, flags | LCK_LV_CLUSTERED(lv), lv)
/*
* Activation locks are wrapped around activation commands that have to
* be processed atomically one-at-a-time.
* If a VG WRITE lock is held, an activation lock is redundant.
* If a VG WRITE lock is held or clustered activation activates simple LV
* an activation lock is redundant.
*
* FIXME Test and support this for thin and cache types.
* FIXME Add cluster support.
* Some LV types do require taking a lock common for whole group of LVs.
* TODO: For simplicity reasons ATM take a VG activation global lock and
* later more fine-grained component detection algorithm can be added
*/
#define lv_supports_activation_locking(lv) (!vg_is_clustered((lv)->vg) && !lv_is_thin_type(lv) && !lv_is_cache_type(lv))
#define lock_activation(cmd, lv) (vg_write_lock_held() && lv_supports_activation_locking(lv) ? 1 : lock_vol(cmd, (lv)->lvid.s, LCK_ACTIVATE_LOCK, lv))
#define unlock_activation(cmd, lv) (vg_write_lock_held() && lv_supports_activation_locking(lv) ? 1 : lock_vol(cmd, (lv)->lvid.s, LCK_ACTIVATE_UNLOCK, lv))
#define lv_type_requires_activation_lock(lv) ((lv_is_thin_type(lv) || lv_is_cache_type(lv) || lv_is_mirror_type(lv) || lv_is_raid_type(lv) || lv_is_origin(lv) || lv_is_snapshot(lv)) ? 1 : 0)
#define lv_activation_lock_name(lv) (lv_type_requires_activation_lock(lv) ? (lv)->vg->name : (lv)->lvid.s)
#define lv_requires_activation_lock_now(lv) ((!vg_write_lock_held() && (!vg_is_clustered((lv)->vg) || !lv_type_requires_activation_lock(lv))) ? 1 : 0)
#define lock_activation(cmd, lv) (lv_requires_activation_lock_now(lv) ? lock_vol(cmd, lv_activation_lock_name(lv), LCK_ACTIVATE_LOCK, lv) : 1)
#define unlock_activation(cmd, lv) (lv_requires_activation_lock_now(lv) ? lock_vol(cmd, lv_activation_lock_name(lv), LCK_ACTIVATE_UNLOCK, lv) : 1)
/*
* Place temporary exclusive 'activation' lock around an LV locking operation
@@ -239,7 +242,7 @@ int check_lvm1_vg_inactive(struct cmd_context *cmd, const char *vgname);
lock_lv_vol(cmd, lv, LCK_LV_EXCLUSIVE | LCK_HOLD | LCK_REMOTE)
struct logical_volume;
int activate_lv_excl(struct cmd_context *cmd, struct logical_volume *lv);
int activate_lv_excl(struct cmd_context *cmd, const struct logical_volume *lv);
#define activate_lv_local(cmd, lv) \
lock_lv_vol_serially(cmd, lv, LCK_LV_ACTIVATE | LCK_HOLD | LCK_LOCAL)
@@ -259,10 +262,6 @@ int sync_dev_names(struct cmd_context* cmd);
/* Process list of LVs */
struct volume_group;
int suspend_lvs(struct cmd_context *cmd, struct dm_list *lvs,
struct volume_group *vg_to_revert);
int resume_lvs(struct cmd_context *cmd, struct dm_list *lvs);
int revert_lvs(struct cmd_context *cmd, struct dm_list *lvs);
int activate_lvs(struct cmd_context *cmd, struct dm_list *lvs, unsigned exclusive);
#endif

View File

@@ -412,7 +412,7 @@ static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg,
return 0;
}
if (!dev_set(dev, old_size_bytes, new_size_bytes - old_size_bytes, 0)) {
if (!dev_set(dev, old_size_bytes, new_size_bytes - old_size_bytes, DEV_IO_LV, 0)) {
log_error("Extend sanlock LV %s cannot zero device.", display_lvname(lv));
dev_close_immediate(dev);
return 0;
@@ -667,6 +667,10 @@ static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, in
case -EARGS:
log_error("VG %s init failed: invalid parameters for sanlock", vg->name);
break;
case -EDEVOPEN:
log_error("VG %s init failed: sanlock cannot open device /dev/mapper/%s-%s", vg->name, vg->name, LOCKD_SANLOCK_LV_NAME);
log_error("Check that sanlock has permission to access disks.");
break;
case -EMANAGER:
log_error("VG %s init failed: lock manager sanlock is not running", vg->name);
break;
@@ -1646,15 +1650,15 @@ int lockd_gl(struct cmd_context *cmd, const char *def_mode, uint32_t flags)
*/
log_error("Global lock failed: held by other host.");
return 0;
} else {
/*
* We don't intend to reach this. We should check
* any known/possible error specifically and print
* a more helpful message. This is for completeness.
*/
log_error("Global lock failed: error %d.", result);
return 0;
}
/*
* We don't intend to reach this. We should check
* any known/possible error specifically and print
* a more helpful message. This is for completeness.
*/
log_error("Global lock failed: error %d.", result);
return 0;
}
allow:
@@ -1908,6 +1912,23 @@ int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
}
}
/*
* The lock is held by another host, and retries have been unsuccessful.
*/
if (result == -EAGAIN) {
if (!strcmp(mode, "un")) {
ret = 1;
goto out;
} else if (!strcmp(mode, "sh")) {
log_warn("VG %s lock skipped: held by other host.", vg_name);
ret = 1;
goto out;
} else {
log_error("VG %s lock failed: held by other host.", vg_name);
ret = 0;
goto out;
}
}
/*
* No lockspace for the VG was found. It may be a local
* VG that lvmlockd doesn't keep track of, or it may be
@@ -2042,6 +2063,15 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
return 0;
}
/*
* This is a hack for mirror LVs which need to know at a very low level
* which lock mode the LV is being activated with so that it can pick
* a mirror log type during activation. Do not use this for anything
* else.
*/
if (mode && !strcmp(mode, "sh"))
cmd->lockd_lv_sh = 1;
if (!mode)
mode = "ex";
@@ -2118,7 +2148,7 @@ int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
static int _lockd_lv_thin(struct cmd_context *cmd, struct logical_volume *lv,
const char *def_mode, uint32_t flags)
{
struct logical_volume *pool_lv;
struct logical_volume *pool_lv = NULL;
if (lv_is_thin_volume(lv)) {
struct lv_segment *pool_seg = first_seg(lv);
@@ -2131,6 +2161,11 @@ static int _lockd_lv_thin(struct cmd_context *cmd, struct logical_volume *lv,
/* FIXME: there should be a function to get pool lv from data lv. */
pool_lv = lv_parent(lv);
} else if (lv_is_thin_pool_metadata(lv)) {
struct lv_segment *pool_seg = get_only_segment_using_this_lv(lv);
if (pool_seg)
pool_lv = pool_seg->lv;
} else {
/* This should not happen AFAIK. */
log_error("Lock on incorrect thin lv type %s/%s",
@@ -2159,6 +2194,31 @@ static int _lockd_lv_thin(struct cmd_context *cmd, struct logical_volume *lv,
pool_lv->lock_args, def_mode, flags);
}
/*
* Only the combination of dlm + corosync + cmirrord allows
* mirror LVs to be activated in shared mode on multiple nodes.
*/
static int _lockd_lv_mirror(struct cmd_context *cmd, struct logical_volume *lv,
const char *def_mode, uint32_t flags)
{
if (!strcmp(lv->vg->lock_type, "sanlock"))
flags |= LDLV_MODE_NO_SH;
else if (!strcmp(lv->vg->lock_type, "dlm") && def_mode && !strcmp(def_mode, "sh")) {
#ifdef CMIRRORD_PIDFILE
if (!cmirrord_is_running()) {
log_error("cmirrord must be running to activate an LV in shared mode.");
return 0;
}
#else
flags |= LDLV_MODE_NO_SH;
#endif
}
return lockd_lv_name(cmd, lv->vg, lv->name, &lv->lvid.id[1],
lv->lock_args, def_mode, flags);
}
/*
* If the VG has no lock_type, then this function can return immediately.
* The LV itself may have no lock (NULL lv->lock_args), but the lock request
@@ -2212,12 +2272,14 @@ int lockd_lv(struct cmd_context *cmd, struct logical_volume *lv,
*/
if (lv_is_external_origin(lv) ||
lv_is_thin_type(lv) ||
lv_is_mirror_type(lv) ||
lv_is_raid_type(lv) ||
lv_is_cache_type(lv)) {
flags |= LDLV_MODE_NO_SH;
}
if (lv_is_mirror_type(lv))
return _lockd_lv_mirror(cmd, lv, def_mode, flags);
return lockd_lv_name(cmd, lv->vg, lv->name, &lv->lvid.id[1],
lv->lock_args, def_mode, flags);
}
@@ -2380,16 +2442,19 @@ int lockd_init_lv(struct cmd_context *cmd, struct volume_group *vg, struct logic
if (!_lvmlockd_connected)
return 0;
if (!lp->needs_lockd_init)
if (!lp->needs_lockd_init) {
/* needs_lock_init is set for LVs that need a lockd lock. */
return 1;
if (seg_is_cache(lp) || seg_is_cache_pool(lp)) {
} else if (seg_is_cache(lp) || seg_is_cache_pool(lp)) {
/*
* This should not happen because the command defs are
* checked and excluded for shared VGs early in lvcreate.
*/
log_error("Use lvconvert for cache with lock type %s", vg->lock_type);
return 0;
}
if (!seg_is_thin_volume(lp) && lp->snapshot) {
} else if (!seg_is_thin_volume(lp) && lp->snapshot) {
struct logical_volume *origin_lv;
/*
@@ -2414,9 +2479,8 @@ int lockd_init_lv(struct cmd_context *cmd, struct volume_group *vg, struct logic
}
lv->lock_args = NULL;
return 1;
}
if (seg_is_thin(lp)) {
} else if (seg_is_thin(lp)) {
if ((seg_is_thin_volume(lp) && !lp->create_pool) ||
(!seg_is_thin_volume(lp) && lp->snapshot)) {
struct lv_list *lvl;
@@ -2437,31 +2501,33 @@ int lockd_init_lv(struct cmd_context *cmd, struct volume_group *vg, struct logic
}
lv->lock_args = NULL;
return 1;
}
if (seg_is_thin_volume(lp) && lp->create_pool) {
} else if (seg_is_thin_volume(lp) && lp->create_pool) {
/*
* Creating a thin pool and a thin lv in it. We could
* probably make this work.
*
* This should not happen because the command defs are
* checked and excluded for shared VGs early in lvcreate.
*/
log_error("Create thin pool and thin LV separately with lock type %s",
vg->lock_type);
return 0;
}
if (!seg_is_thin_volume(lp) && lp->create_pool) {
} else if (!seg_is_thin_volume(lp) && lp->create_pool) {
/* Creating a thin pool only. */
/* lv_name_lock = lp->pool_name; */
} else {
log_error("Unknown thin options for lock init.");
return 0;
}
log_error("Unknown thin options for lock init.");
return 0;
} else {
/* Creating a normal lv. */
/* lv_name_lock = lv_name; */
}
/* Creating a normal lv. */
/* lv_name_lock = lv_name; */
/*
* The LV gets its own lock, so set lock_args to non-NULL.
*

View File

@@ -594,7 +594,7 @@ static void _vprint_log(int level, const char *file, int line, int dm_errno_or_c
log_it:
if (!logged_via_report && ((verbose_level() >= level) && !_log_suppress)) {
if (verbose_level() > _LOG_DEBUG) {
(void) dm_snprintf(buf, sizeof(buf), "#%s:%d ",
(void) dm_snprintf(buf, sizeof(buf), "#%s:%-5d ",
file, line);
} else
buf[0] = '\0';
@@ -639,18 +639,23 @@ static void _vprint_log(int level, const char *file, int line, int dm_errno_or_c
}
if (_log_to_file && (_log_while_suspended || !critical_section())) {
fprintf(_log_file, "%s:%d %s%s", file, line, log_command_name(),
fprintf(_log_file, "%s:%-5d %s%s", file, line, log_command_name(),
_msg_prefix);
va_copy(ap, orig_ap);
vfprintf(_log_file, trformat, ap);
va_end(ap);
if (_log_file_max_lines && ++_log_file_lines >= _log_file_max_lines) {
fprintf(_log_file, "\n%s:%-5d %sAborting. Command has reached limit "
"for logged lines (LVM_LOG_FILE_MAX_LINES=" FMTu64 ").",
file, line, _msg_prefix,
_log_file_max_lines);
fatal_internal_error = 1;
}
fputc('\n', _log_file);
fflush(_log_file);
if (_log_file_max_lines && ++_log_file_lines >= _log_file_max_lines)
fatal_internal_error = 1;
}
if (_syslog && (_log_while_suspended || !critical_section())) {
@@ -668,7 +673,7 @@ static void _vprint_log(int level, const char *file, int line, int dm_errno_or_c
memset(&buf, ' ', sizeof(buf));
bufused = 0;
if ((n = dm_snprintf(buf, sizeof(buf),
"%s:%d %s%s", file, line, log_command_name(),
"%s:%-5d %s%s", file, line, log_command_name(),
_msg_prefix)) == -1)
goto done;
@@ -689,7 +694,7 @@ static void _vprint_log(int level, const char *file, int line, int dm_errno_or_c
buf[bufused] = '\n';
buf[sizeof(buf) - 1] = '\n';
/* FIXME real size bufused */
dev_append(&_log_dev, sizeof(buf), buf);
dev_append(&_log_dev, sizeof(buf), DEV_IO_LOG, buf);
_already_logging = 0;
}
}

View File

@@ -72,6 +72,7 @@
#define LOG_CLASS_LOCKING 0x0080 /* "locking" */
#define LOG_CLASS_LVMPOLLD 0x0100 /* "lvmpolld" */
#define LOG_CLASS_DBUS 0x0200 /* "dbus" */
#define LOG_CLASS_IO 0x0400 /* "io" */
#define log_debug(x...) LOG_LINE(_LOG_DEBUG, x)
#define log_debug_mem(x...) LOG_LINE_WITH_CLASS(_LOG_DEBUG, LOG_CLASS_MEM, x)
@@ -84,6 +85,7 @@
#define log_debug_locking(x...) LOG_LINE_WITH_CLASS(_LOG_DEBUG, LOG_CLASS_LOCKING, x)
#define log_debug_lvmpolld(x...) LOG_LINE_WITH_CLASS(_LOG_DEBUG, LOG_CLASS_LVMPOLLD, x)
#define log_debug_dbus(x...) LOG_LINE_WITH_CLASS(_LOG_DEBUG, LOG_CLASS_DBUS, x)
#define log_debug_io(x...) LOG_LINE_WITH_CLASS(_LOG_DEBUG, LOG_CLASS_IO, x)
#define log_info(x...) LOG_LINE(_LOG_INFO, x)
#define log_notice(x...) LOG_LINE(_LOG_NOTICE, x)

View File

@@ -129,7 +129,7 @@ void cache_check_for_warns(const struct lv_segment *seg)
if (lv_is_raid(origin_lv) &&
first_seg(seg->pool_lv)->cache_mode == CACHE_MODE_WRITEBACK)
log_warn("WARNING: Data redundancy is lost with writeback "
log_warn("WARNING: Data redundancy could be lost with writeback "
"caching of raid logical volume!");
if (lv_is_thin_pool_data(seg->lv))
@@ -960,7 +960,7 @@ int wipe_cache_pool(struct logical_volume *cache_pool_lv)
}
cache_pool_lv->status |= LV_TEMPORARY;
if (!activate_lv_local(cache_pool_lv->vg->cmd, cache_pool_lv)) {
if (!activate_lv_excl_local(cache_pool_lv->vg->cmd, cache_pool_lv)) {
log_error("Aborting. Failed to activate cache pool %s.",
display_lvname(cache_pool_lv));
return 0;

View File

@@ -1462,7 +1462,6 @@ int lv_active_change(struct cmd_context *cmd, struct logical_volume *lv,
enum activation_change activate, int needs_exclusive)
{
const char *ay_with_mode = NULL;
struct lv_segment *seg = first_seg(lv);
if (activate == CHANGE_ASY)
ay_with_mode = "sh";
@@ -1499,9 +1498,6 @@ deactivate:
break;
case CHANGE_ALY:
case CHANGE_AAY:
if (!raid4_is_supported(cmd, seg->segtype))
goto no_raid4;
if (needs_exclusive || _lv_is_exclusive(lv)) {
log_verbose("Activating logical volume %s exclusively locally.",
display_lvname(lv));
@@ -1516,9 +1512,6 @@ deactivate:
break;
case CHANGE_AEY:
exclusive:
if (!raid4_is_supported(cmd, seg->segtype))
goto no_raid4;
log_verbose("Activating logical volume %s exclusively.",
display_lvname(lv));
if (!activate_lv_excl(cmd, lv))
@@ -1527,9 +1520,6 @@ exclusive:
case CHANGE_ASY:
case CHANGE_AY:
default:
if (!raid4_is_supported(cmd, seg->segtype))
goto no_raid4;
if (needs_exclusive || _lv_is_exclusive(lv))
goto exclusive;
log_verbose("Activating logical volume %s.", display_lvname(lv));
@@ -1542,10 +1532,6 @@ exclusive:
log_error("Failed to unlock logical volume %s.", display_lvname(lv));
return 1;
no_raid4:
log_error("Failed to activate %s LV %s", lvseg_name(seg), display_lvname(lv));
return 0;
}
char *lv_active_dup(struct dm_pool *mem, const struct logical_volume *lv)
@@ -1618,6 +1604,9 @@ const struct logical_volume *lv_lock_holder(const struct logical_volume *lv)
if ((lv_is_raid_image(lv) || lv_is_raid_metadata(lv)) && lv_is_visible(lv))
return lv;
if (lv_is_pvmove(lv))
return lv;
/* For other types, by default look for the first user */
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
/* FIXME: complete this exception list */
@@ -1627,6 +1616,9 @@ const struct logical_volume *lv_lock_holder(const struct logical_volume *lv)
continue; /* Skip thin snaphost */
if (lv_is_pending_delete(sl->seg->lv))
continue; /* Skip deleted LVs */
if (lv_is_cache_pool(sl->seg->lv) &&
!lv_is_used_cache_pool(sl->seg->lv))
continue; /* Skip unused cache-pool */
return lv_lock_holder(sl->seg->lv);
}

View File

@@ -19,7 +19,6 @@
union lvid;
struct lv_segment;
struct replicator_device;
enum activation_change;
struct logical_volume {
@@ -43,9 +42,6 @@ struct logical_volume {
struct dm_list snapshot_segs;
struct lv_segment *snapshot;
struct replicator_device *rdevice;/* For replicator-devs, rimages, slogs - reference to rdevice */
struct dm_list rsites; /* For replicators - all sites */
struct dm_list segments;
struct dm_list tags;
struct dm_list segs_using_this_lv;

View File

@@ -941,8 +941,7 @@ struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
return_NULL;
}
if (segtype_is_raid(segtype) &&
!segtype_is_raid0(segtype) &&
if (segtype_is_raid_with_meta(segtype) &&
!(seg->meta_areas = dm_pool_zalloc(mem, areas_sz))) {
dm_pool_free(mem, seg); /* frees everything alloced since seg */
return_NULL;
@@ -1315,13 +1314,19 @@ static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction)
*/
static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete)
{
struct lv_segment *seg = first_seg(lv);;
struct lv_segment *seg = NULL;
uint32_t count = extents;
uint32_t reduction;
struct logical_volume *pool_lv;
struct logical_volume *external_lv = NULL;
int is_raid10 = seg_is_any_raid10(seg) && seg->reshape_len;
uint32_t data_copies = seg->data_copies;
int is_raid10 = 0;
uint32_t data_copies = 0;
if (!dm_list_empty(&lv->segments)) {
seg = first_seg(lv);
is_raid10 = seg_is_any_raid10(seg) && seg->reshape_len;
data_copies = seg->data_copies;
}
if (lv_is_merging_origin(lv)) {
log_debug_metadata("Dropping snapshot merge of %s to removed origin %s.",
@@ -2732,7 +2737,8 @@ static int _limit_to_one_area_per_tag(struct alloc_handle *ah, struct alloc_stat
s++;
}
alloc_state->areas[u].pva = NULL;
if (u < alloc_state->areas_size)
alloc_state->areas[u].pva = NULL;
return 1;
}
@@ -3917,7 +3923,7 @@ static int _lv_insert_empty_sublvs(struct logical_volume *lv,
return_0;
/* Metadata LVs for raid */
if (segtype_is_raid(segtype) && !segtype_is_raid0(segtype)) {
if (segtype_is_raid_with_meta(segtype)) {
if (dm_snprintf(img_name, sizeof(img_name), "%s_rmeta_%u",
lv->name, i) < 0)
goto_bad;
@@ -4022,6 +4028,15 @@ static int _lv_extend_layered_lv(struct alloc_handle *ah,
fa += stripes;
}
seg->len += extents;
if (seg_is_raid(seg))
seg->area_len = seg->len;
else
seg->area_len += extents / area_multiple;
if (!_setup_lv_size(lv, lv->le_count + extents))
return_0;
if (clear_metadata) {
/*
* We must clear the metadata areas upon creation.
@@ -4086,15 +4101,6 @@ static int _lv_extend_layered_lv(struct alloc_handle *ah,
lv_set_hidden(seg_metalv(seg, s));
}
seg->len += extents;
if (seg_is_raid(seg))
seg->area_len = seg->len;
else
seg->area_len += extents / area_multiple;
if (!_setup_lv_size(lv, lv->le_count + extents))
return_0;
return 1;
}
@@ -4138,7 +4144,7 @@ int lv_extend(struct logical_volume *lv,
else if (segtype_is_raid0_meta(segtype))
/* Extend raid0 metadata LVs too */
log_count = stripes;
else if (segtype_is_raid(segtype) && !segtype_is_raid0(segtype))
else if (segtype_is_raid_with_meta(segtype))
log_count = mirrors * stripes;
}
/* FIXME log_count should be 1 for mirrors */
@@ -4165,7 +4171,7 @@ int lv_extend(struct logical_volume *lv,
return_0;
new_extents = ah->new_extents;
if (segtype_is_raid(segtype) && !segtype_is_raid0(segtype))
if (segtype_is_raid_with_meta(segtype))
new_extents -= ah->log_len * ah->area_multiple;
if (segtype_is_pool(segtype)) {
@@ -4228,7 +4234,8 @@ int lv_extend(struct logical_volume *lv,
log_error("Failed to get sync percent for %s.",
display_lvname(lv));
goto out;
} else if (sync_percent == DM_PERCENT_100) {
} else if (lv_is_not_synced(lv) ||
sync_percent == DM_PERCENT_100) {
log_verbose("Skipping initial resync for "
"extended portion of %s",
display_lvname(lv));
@@ -4719,7 +4726,7 @@ static int _lvresize_adjust_policy(const struct logical_volume *lv,
return 1;
}
static uint32_t lvseg_get_stripes(struct lv_segment *seg, uint32_t *stripesize)
static uint32_t _lvseg_get_stripes(struct lv_segment *seg, uint32_t *stripesize)
{
uint32_t s;
struct lv_segment *seg_mirr;
@@ -4797,7 +4804,8 @@ static int _lvresize_check(struct logical_volume *lv,
return 0;
}
if (lv_is_cache_type(lv)) {
if (lv_is_cache_type(lv) ||
(lv_is_thin_pool(lv) && lv_is_cache_type(seg_lv(first_seg(lv), 0)))) {
log_error("Unable to resize logical volumes of cache type.");
return 0;
}
@@ -5165,7 +5173,7 @@ static int _lvresize_adjust_extents(struct logical_volume *lv,
seg_physical_extents = seg->area_len * seg->area_count; /* FIXME Also metadata, cow etc. */
/* Check for underlying stripe sizes */
seg_stripes = lvseg_get_stripes(seg, &seg_stripesize);
seg_stripes = _lvseg_get_stripes(seg, &seg_stripesize);
if (seg_is_mirrored(seg))
seg_mirrors = lv_mirror_count(seg->lv);
@@ -5748,7 +5756,6 @@ struct logical_volume *alloc_lv(struct dm_pool *mem)
dm_list_init(&lv->tags);
dm_list_init(&lv->segs_using_this_lv);
dm_list_init(&lv->indirect_glvs);
dm_list_init(&lv->rsites);
return lv;
}
@@ -6352,6 +6359,15 @@ int lv_remove_with_dependencies(struct cmd_context *cmd, struct logical_volume *
!_lv_remove_segs_using_this_lv(cmd, lv, force, level, "pool"))
return_0;
if (lv_is_cache_pool(lv) && !lv_is_used_cache_pool(lv)) {
if (!deactivate_lv(cmd, first_seg(lv)->metadata_lv) ||
!deactivate_lv(cmd, seg_lv(first_seg(lv),0))) {
log_error("Unable to fully deactivate unused cache-pool %s.",
display_lvname(lv));
return 0;
}
}
if (lv_is_pool_metadata_spare(lv) &&
(force == PROMPT)) {
dm_list_iterate_items(lvl, &lv->vg->lvs)
@@ -6408,7 +6424,7 @@ static int _lv_update_and_reload(struct logical_volume *lv, int origin_only)
r = 0;
}
if (do_backup)
if (do_backup && !critical_section())
backup(vg);
return r;
@@ -6619,6 +6635,14 @@ int remove_layers_for_segments_all(struct cmd_context *cmd,
if (!lv_empty(layer_lv))
return_0;
/* Assumes only used by PVMOVE ATM when unlocking LVs */
dm_list_iterate_items(lvl, lvs_changed) {
/* FIXME Assumes only one pvmove at a time! */
lvl->lv->status &= ~LOCKED;
if (!lv_merge_segments(lvl->lv))
return_0;
}
return 1;
}
@@ -7043,14 +7067,25 @@ int insert_layer_for_segments_on_pv(struct cmd_context *cmd,
struct lv_list *lvl;
int lv_used = 0;
uint32_t s;
struct logical_volume *holder = (struct logical_volume *) lv_lock_holder(lv_where);
log_very_verbose("Inserting layer %s for segments of %s on %s",
layer_lv->name, lv_where->name,
pvl ? pv_dev_name(pvl->pv) : "any");
/* Temporarily hide layer_lv from vg->lvs list
* so the lv_split_segment() passes vg_validate()
* since here layer_lv has empty segment list */
if (!(lvl = find_lv_in_vg(lv_where->vg, layer_lv->name)))
return_0;
dm_list_del(&lvl->list);
if (!_align_segment_boundary_to_pe_range(lv_where, pvl))
return_0;
/* Put back layer_lv in vg->lv */
dm_list_add(&lv_where->vg->lvs, &lvl->list);
/* Work through all segments on the supplied PV */
dm_list_iterate_items(seg, &lv_where->segments) {
for (s = 0; s < seg->area_count; s++) {
@@ -7059,13 +7094,23 @@ int insert_layer_for_segments_on_pv(struct cmd_context *cmd,
/* First time, add LV to list of LVs affected */
if (!lv_used && lvs_changed) {
if (!(lvl = dm_pool_alloc(cmd->mem, sizeof(*lvl)))) {
log_error("lv_list alloc failed");
return 0;
/* First check if LV is listed already */
dm_list_iterate_items(lvl, lvs_changed)
if (lvl->lv == holder) {
lv_used = 1;
break;
}
if (!lv_used) {
if (!(lvl = dm_pool_alloc(cmd->mem, sizeof(*lvl)))) {
log_error("lv_list alloc failed.");
return 0;
}
lvl->lv = holder;
dm_list_add(lvs_changed, &lvl->list);
lv_used = 1;
}
lvl->lv = lv_where;
dm_list_add(lvs_changed, &lvl->list);
lv_used = 1;
}
if (!_extend_layer_lv_for_segment(layer_lv, seg, s,
@@ -7148,7 +7193,7 @@ int wipe_lv(struct logical_volume *lv, struct wipe_params wp)
display_size(lv->vg->cmd, zero_sectors),
lv->vg->name, lv->name, wp.zero_value);
if (!dev_set(dev, UINT64_C(0), (size_t) zero_sectors << SECTOR_SHIFT, wp.zero_value))
if (!dev_set(dev, UINT64_C(0), (size_t) zero_sectors << SECTOR_SHIFT, DEV_IO_LV, wp.zero_value))
stack;
}
@@ -7239,19 +7284,6 @@ int lv_activation_skip(struct logical_volume *lv, activation_change_t activate,
return 1;
}
int lv_maintenance_skip(struct logical_volume *lv, activation_change_t activate,
int override_maintenance_flag)
{
if (!(lv->status & LV_MAINTENANCE) ||
!is_change_activating(activate) || /* Do not skip deactivation */
override_maintenance_flag)
return 0;
log_verbose("MAINTENANCE flag set for LV %s/%s, skipping activation.",
lv->vg->name, lv->name);
return 1;
}
static int _should_wipe_lv(struct lvcreate_params *lp,
struct logical_volume *lv, int warn)
{
@@ -7549,11 +7581,18 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
status |= LV_NOTSYNCED;
}
lp->region_size = adjusted_mirror_region_size(vg->cmd,
vg->extent_size,
lp->extents,
lp->region_size, 0,
vg_is_clustered(vg));
if (seg_is_raid(lp)) {
/* Value raid target constraint */
if (lp->region_size > (uint64_t)vg->extent_size * lp->extents) {
log_error("Cannot create RAID LV with region size larger than LV size.");
return NULL;
}
} else
lp->region_size = adjusted_mirror_region_size(vg->cmd,
vg->extent_size,
lp->extents,
lp->region_size, 0,
vg_is_clustered(vg));
} else if (pool_lv && seg_is_thin_volume(lp)) {
if (!lv_is_thin_pool(pool_lv)) {
log_error("Logical volume %s is not a thin pool.",
@@ -7598,47 +7637,9 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
"Use --virtualsize.");
return NULL;
}
if (lv_is_cow(origin_lv)) {
log_error("Snapshots of snapshots are not supported.");
return NULL;
}
if (lv_is_locked(origin_lv)) {
log_error("Snapshots of locked devices are not supported.");
return NULL;
}
if (lv_is_merging_origin(origin_lv)) {
log_error("Snapshots of an origin that has a "
"merging snapshot is not supported");
return NULL;
}
if (lv_is_cache_type(origin_lv) && !lv_is_cache(origin_lv)) {
log_error("Snapshots of cache type volume %s "
"is not supported.", display_lvname(origin_lv));
return NULL;
}
if (lv_is_thin_type(origin_lv) && !lv_is_thin_volume(origin_lv)) {
log_error("Snapshots of thin pool %sdevices "
"are not supported.",
lv_is_thin_pool_data(origin_lv) ? "data " :
lv_is_thin_pool_metadata(origin_lv) ?
"metadata " : "");
return NULL;
}
if (lv_is_mirror_type(origin_lv)) {
log_warn("WARNING: Snapshots of mirrors can deadlock under rare device failures.");
log_warn("WARNING: Consider using the raid1 mirror type to avoid this.");
log_warn("WARNING: See global/mirror_segtype_default in lvm.conf.");
}
if (vg_is_clustered(vg) && lv_is_active(origin_lv) &&
!lv_is_active_exclusive_locally(origin_lv)) {
log_error("%s must be active exclusively to"
" create snapshot", origin_lv->name);
return NULL;
}
if (!validate_snapshot_origin(origin_lv))
return_0;
}
if (!cow_has_min_chunks(vg, lp->extents, lp->chunk_size))
@@ -7704,8 +7705,17 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
lp->mirrors,
segtype_is_pool(create_segtype) ? lp->pool_metadata_extents : lp->region_size,
segtype_is_thin_volume(create_segtype) ? lp->virtual_extents : lp->extents,
lp->pvh, lp->alloc, lp->approx_alloc))
lp->pvh, lp->alloc, lp->approx_alloc)) {
unlink_lv_from_vg(lv); /* Keep VG consistent and remove LV without any segment */
return_NULL;
}
/* rhbz1269533: allow for 100%FREE allocation to work with "mirror" and a disk log */
if (segtype_is_mirror(create_segtype) &&
lp->log_count &&
!vg->free_count &&
lv->le_count > 1)
lv_reduce(lv, 1);
/* Unlock memory if possible */
memlock_unlock(vg->cmd);

View File

@@ -98,6 +98,14 @@ int lv_merge_segments(struct logical_volume *lv)
return; \
}
/* Check segment LV for reshape flags. */
static int _check_raid_seg_reshape_flags(struct lv_segment *seg)
{
return ((seg->lv->status & LV_RESHAPE) ||
(seg->lv->status & LV_RESHAPE_DELTA_DISKS_MINUS) ||
(seg->lv->status & LV_RESHAPE_DELTA_DISKS_PLUS));
}
/* Check raid0 segment properties in @seg */
static void _check_raid0_seg(struct lv_segment *seg, int *error_count)
{
@@ -119,8 +127,10 @@ static void _check_raid0_seg(struct lv_segment *seg, int *error_count)
raid_seg_error_val("non-zero min recovery rate", seg->min_recovery_rate);
if (seg->max_recovery_rate)
raid_seg_error_val("non-zero max recovery rate", seg->max_recovery_rate);
if ((seg->lv->status & LV_RESHAPE_DATA_OFFSET) || seg->data_offset)
if ((seg->lv->status & LV_RESHAPE_DATA_OFFSET) || seg->data_offset > 1)
raid_seg_error_val("data_offset", seg->data_offset);
if (_check_raid_seg_reshape_flags(seg))
raid_seg_error("reshape");
}
/* Check RAID @seg for non-zero, power of 2 region size and min recovery rate <= max */
@@ -143,8 +153,10 @@ static void _check_raid1_seg(struct lv_segment *seg, int *error_count)
raid_seg_error("no meta areas");
if (seg->stripe_size)
raid_seg_error_val("non-zero stripe size", seg->stripe_size);
if ((seg->lv->status & LV_RESHAPE_DATA_OFFSET) || seg->data_offset)
if ((seg->lv->status & LV_RESHAPE_DATA_OFFSET) || seg->data_offset > 1)
raid_seg_error_val("data_offset", seg->data_offset);
if (_check_raid_seg_reshape_flags(seg))
raid_seg_error("reshape");
_check_raid_region_recovery(seg, error_count);
}
@@ -169,11 +181,13 @@ static void _check_raid45610_seg(struct lv_segment *seg, int *error_count)
_check_raid_region_recovery(seg, error_count);
/* END: checks applying to any raid4/5/6/10 */
if (seg->lv->status & LV_RESHAPE_DATA_OFFSET) {
if (seg->data_offset & (seg->lv->vg->extent_size - 1))
if (seg->data_offset > 1) {
if (seg->lv->status & LV_RESHAPE_DATA_OFFSET) {
if (seg->data_offset & (seg->lv->vg->extent_size - 1))
raid_seg_error_val("data_offset", seg->data_offset);
} else
raid_seg_error_val("data_offset", seg->data_offset);
} else if (seg->data_offset)
raid_seg_error_val("data_offset", seg->data_offset);
}
/* Specific checks per raid level */
if (seg_is_raid4(seg) ||
@@ -217,17 +231,6 @@ static void _check_non_raid_seg_members(struct lv_segment *seg, int *error_count
raid_seg_error("non-zero cow LV");
if (!dm_list_empty(&seg->origin_list)) /* snap */
raid_seg_error("non-zero origin_list");
/* replicator members (deprecated) */
if (seg->replicator)
raid_seg_error("non-zero replicator");
if (seg->rlog_lv)
raid_seg_error("non-zero rlog LV");
if (seg->rlog_type)
raid_seg_error("non-zero rlog type");
if (seg->rdevice_index_highest)
raid_seg_error("non-zero rdevice_index_highests");
if (seg->rsite_index_highest)
raid_seg_error("non-zero rsite_index_highests");
/* .... more members? */
}
@@ -397,6 +400,9 @@ static void _check_lv_segment(struct logical_volume *lv, struct lv_segment *seg,
if (seg_is_raid(seg))
_check_raid_seg(seg, error_count);
else if (!lv_is_raid_type(lv) &&
_check_raid_seg_reshape_flags(seg))
seg_error("reshape");
if (seg_is_pool(seg)) {
if ((seg->area_count != 1) || (seg_type(seg, 0) != AREA_LV)) {
@@ -511,8 +517,6 @@ int check_lv_segments(struct logical_volume *lv, int complete_vg)
struct seg_list *sl;
struct glv_list *glvl;
int error_count = 0;
struct replicator_site *rsite;
struct replicator_device *rdev;
dm_list_iterate_items(seg, &lv->segments) {
seg_count++;
@@ -549,9 +553,6 @@ int check_lv_segments(struct logical_volume *lv, int complete_vg)
}
}
if (seg_is_replicator(seg) && !check_replicator_segment(seg))
inc_error_count;
if (complete_vg)
_check_lv_segment(lv, seg, seg_count, &error_count);
@@ -642,6 +643,11 @@ int check_lv_segments(struct logical_volume *lv, int complete_vg)
inc_error_count;
}
if (!le) {
log_error("LV %s: has no segment.", lv->name);
inc_error_count;
}
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
seg = sl->seg;
seg_found = 0;
@@ -653,18 +659,6 @@ int check_lv_segments(struct logical_volume *lv, int complete_vg)
if (seg->meta_areas && seg_is_raid_with_meta(seg) && (lv == seg_metalv(seg, s)))
seg_found++;
}
if (seg_is_replicator_dev(seg)) {
dm_list_iterate_items(rsite, &seg->replicator->rsites) {
dm_list_iterate_items(rdev, &rsite->rdevices) {
if (lv == rdev->lv || lv == rdev->slog)
seg_found++;
}
}
if (lv == seg->replicator)
seg_found++;
}
if (seg_is_replicator(seg) && lv == seg->rlog_lv)
seg_found++;
if (seg->log_lv == lv)
seg_found++;
if (seg->metadata_lv == lv || seg->pool_lv == lv)

View File

@@ -84,6 +84,7 @@
#define CONVERTING UINT64_C(0x0000000000400000) /* LV */
#define MISSING_PV UINT64_C(0x0000000000800000) /* PV */
#define PV_MOVED_VG UINT64_C(0x4000000000000000) /* PV - Moved to a new VG */
#define PARTIAL_LV UINT64_C(0x0000000001000000) /* LV - derived flag, not
written out in metadata*/
@@ -93,8 +94,6 @@
#define MERGING UINT64_C(0x0000000010000000) /* LV SEG */
#define REPLICATOR UINT64_C(0x0000000020000000) /* LV -internal use only for replicator */
#define REPLICATOR_LOG UINT64_C(0x0000000040000000) /* LV -internal use only for replicator-dev */
#define UNLABELLED_PV UINT64_C(0x0000000080000000) /* PV -this PV had no label written yet */
#define RAID UINT64_C(0x0000000100000000) /* LV - Internal use only */
@@ -146,7 +145,6 @@
#define LV_RESHAPE UINT64_C(0x1000000000000000) /* Ongoing reshape (number of stripes, stripesize or raid algorithm change):
used as SEGTYPE_FLAG to prevent activation on old runtime */
#define LV_RESHAPE_DATA_OFFSET UINT64_C(0x2000000000000000) /* LV reshape flag data offset (out of place reshaping) */
#define LV_MAINTENANCE UINT64_C(0x4000000000000000) /* LV maintenance mode */
/* Next unused flag: UINT64_C(0x8000000000000000) */
/* Format features flags */
@@ -253,8 +251,6 @@
#define lv_is_pool_metadata_spare(lv) (((lv)->status & POOL_METADATA_SPARE) ? 1 : 0)
#define lv_is_lockd_sanlock_lv(lv) (((lv)->status & LOCKD_SANLOCK_LV) ? 1 : 0)
#define lv_is_rlog(lv) (((lv)->status & REPLICATOR_LOG) ? 1 : 0)
#define lv_is_removed(lv) (((lv)->status & LV_REMOVED) ? 1 : 0)
int lv_layout_and_role(struct dm_pool *mem, const struct logical_volume *lv,
@@ -416,53 +412,6 @@ struct lv_thin_message {
struct segment_type;
/* List with vg_name, vgid and flags */
struct cmd_vg {
struct dm_list list;
const char *vg_name;
const char *vgid;
uint32_t flags;
struct volume_group *vg;
};
/* ++ Replicator datatypes */
typedef enum {
REPLICATOR_STATE_PASSIVE,
REPLICATOR_STATE_ACTIVE,
NUM_REPLICATOR_STATE
} replicator_state_t;
struct replicator_site {
struct dm_list list; /* Chained list of sites */
struct dm_list rdevices; /* Device list */
struct logical_volume *replicator; /* Reference to replicator */
const char *name; /* Site name */
const char *vg_name; /* VG name */
struct volume_group *vg; /* resolved vg (activate/deactive) */
unsigned site_index;
replicator_state_t state; /* Active or pasive state of site */
dm_replicator_mode_t op_mode; /* Operation mode sync or async fail|warn|drop|stall */
uint64_t fall_behind_data; /* Bytes */
uint32_t fall_behind_ios; /* IO operations */
uint32_t fall_behind_timeout; /* Seconds */
};
struct replicator_device {
struct dm_list list; /* Chained list of devices from same site */
struct lv_segment *replicator_dev; /* Reference to replicator-dev segment */
struct replicator_site *rsite; /* Reference to site parameters */
uint64_t device_index;
const char *name; /* Device LV name */
struct logical_volume *lv; /* LV from replicator site's VG */
struct logical_volume *slog; /* Synclog lv from VG */
const char *slog_name; /* Debug - specify size of core synclog */
};
/* -- Replicator datatypes */
struct lv_segment {
struct dm_list list;
struct logical_volume *lv;
@@ -514,12 +463,6 @@ struct lv_segment {
const char *policy_name; /* For cache_pool */
struct dm_config_node *policy_settings; /* For cache_pool */
unsigned cleaner_policy; /* For cache */
struct logical_volume *replicator;/* For replicator-devs - link to replicator LV */
struct logical_volume *rlog_lv; /* For replicators */
const char *rlog_type; /* For replicators */
uint64_t rdevice_index_highest; /* For replicators */
unsigned rsite_index_highest; /* For replicators */
};
#define seg_type(seg, s) (seg)->areas[(s)].type
@@ -745,9 +688,9 @@ uint32_t vg_read_error(struct volume_group *vg_handle);
struct physical_volume *pv_create(const struct cmd_context *cmd,
struct device *dev, struct pv_create_args *pva);
int pvremove_single(struct cmd_context *cmd, const char *pv_name,
void *handle __attribute__((unused)), unsigned force_count,
unsigned prompt, struct dm_list *pvslist);
struct physical_volume *pvcreate_vol(struct cmd_context *cmd, const char *pv_name,
struct pvcreate_params *pp, int write_now);
int pvremove_many(struct cmd_context *cmd, struct dm_list *pv_names,
unsigned force_count, unsigned prompt);
@@ -1016,9 +959,6 @@ void lv_set_activation_skip(struct logical_volume *lv, int override_default, int
int lv_activation_skip(struct logical_volume *lv, activation_change_t activate,
int override_lv_skip_flag);
int lv_maintenance_skip(struct logical_volume *lv, activation_change_t activate,
int override_maintenance_flag);
/*
* Functions for layer manipulation
*/
@@ -1134,6 +1074,9 @@ int vg_add_snapshot(struct logical_volume *origin, struct logical_volume *cow,
int vg_remove_snapshot(struct logical_volume *cow);
int validate_snapshot_origin(const struct logical_volume *origin_lv);
int vg_check_status(const struct volume_group *vg, uint64_t status);
int vg_check_pv_dev_block_sizes(const struct volume_group *vg);
@@ -1205,26 +1148,6 @@ int reconfigure_mirror_images(struct lv_segment *mirrored_seg, uint32_t num_mirr
int collapse_mirrored_lv(struct logical_volume *lv);
int shift_mirror_images(struct lv_segment *mirrored_seg, unsigned mimage);
/* ++ metadata/replicator_manip.c */
int replicator_add_replicator_dev(struct logical_volume *replicator_lv,
struct lv_segment *replicator_dev_seg);
struct logical_volume *replicator_remove_replicator_dev(struct lv_segment *rdev_seg);
int replicator_add_rlog(struct lv_segment *replicator_seg, struct logical_volume *rlog_lv);
struct logical_volume *replicator_remove_rlog(struct lv_segment *replicator_seg);
int replicator_dev_add_slog(struct replicator_device *rdev, struct logical_volume *slog);
struct logical_volume *replicator_dev_remove_slog(struct replicator_device *rdev);
int replicator_dev_add_rimage(struct replicator_device *rdev, struct logical_volume *lv);
struct logical_volume *replicator_dev_remove_rimage(struct replicator_device *rdev);
int lv_is_active_replicator_dev(const struct logical_volume *lv);
int lv_is_replicator(const struct logical_volume *lv);
int lv_is_replicator_dev(const struct logical_volume *lv);
int lv_is_rimage(const struct logical_volume *lv);
int lv_is_slog(const struct logical_volume *lv);
struct logical_volume *first_replicator_dev(const struct logical_volume *lv);
/* -- metadata/replicator_manip.c */
/* ++ metadata/raid_manip.c */
int lv_is_raid_with_tracking(const struct logical_volume *lv);
uint32_t lv_raid_image_count(const struct logical_volume *lv);
@@ -1306,19 +1229,6 @@ int lv_cache_remove(struct logical_volume *cache_lv);
int wipe_cache_pool(struct logical_volume *cache_pool_lv);
/* -- metadata/cache_manip.c */
struct cmd_vg *cmd_vg_add(struct dm_pool *mem, struct dm_list *cmd_vgs,
const char *vg_name, const char *vgid,
uint32_t flags);
struct cmd_vg *cmd_vg_lookup(struct dm_list *cmd_vgs,
const char *vg_name, const char *vgid);
int cmd_vg_read(struct cmd_context *cmd, struct dm_list *cmd_vgs);
void free_cmd_vgs(struct dm_list *cmd_vgs);
int find_replicator_vgs(const struct logical_volume *lv);
int lv_read_replicator_vgs(const struct logical_volume *lv);
void lv_release_replicator_vgs(const struct logical_volume *lv);
struct logical_volume *find_pvmove_lv(struct volume_group *vg,
struct device *dev, uint64_t lv_type);
const struct logical_volume *find_pvmove_lv_in_lv(const struct logical_volume *lv);

View File

@@ -0,0 +1,697 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* This file contains functions now used only by liblvm.
* Ideally this file should be empty as liblvm and toollib should be doing identical things.
* FIXME Merge all the code into different parts of the tree.
*/
#include "lib.h"
#include "toolcontext.h"
#include "lvm-string.h"
#include "metadata.h"
#include "label.h"
#include "lvm-signal.h"
#include "lvmcache.h"
#include "lvmetad.h"
int vg_reduce(struct volume_group *vg, const char *pv_name)
{
struct physical_volume *pv;
struct pv_list *pvl;
if (!(pvl = find_pv_in_vg(vg, pv_name))) {
log_error("Physical volume %s not in volume group %s.",
pv_name, vg->name);
return 0;
}
pv = pvl->pv;
if (vgreduce_single(vg->cmd, vg, pv, 0)) {
dm_list_add(&vg->removed_pvs, &pvl->list);
return 1;
}
log_error("Unable to remove physical volume '%s' from "
"volume group '%s'.", pv_name, vg->name);
return 0;
}
static int _pvcreate_write(struct cmd_context *cmd, struct pv_to_write *pvw)
{
struct physical_volume *pv = pvw->pv;
struct device *dev = pv->dev;
const char *pv_name = dev_name(dev);
if (pvw->new_pv) {
/* Wipe existing label first */
if (!label_remove(pv_dev(pv))) {
log_error("Failed to wipe existing label on %s", pv_name);
return 0;
}
if (pvw->pp->zero) {
log_verbose("Zeroing start of device %s", pv_name);
if (!dev_open_quiet(dev)) {
log_error("%s not opened: device not zeroed", pv_name);
return 0;
}
if (!dev_set(dev, UINT64_C(0), (size_t) 2048, DEV_IO_LABEL, 0)) {
log_error("%s not wiped: aborting", pv_name);
if (!dev_close(dev))
stack;
return 0;
}
if (!dev_close(dev))
stack;
}
}
log_verbose("Writing physical volume data to disk \"%s\"",
pv_name);
if (!(pv_write(cmd, pv, 1))) {
log_error("Failed to write physical volume \"%s\"", pv_name);
return 0;
}
if (pvw->new_pv)
log_print_unless_silent("Physical volume \"%s\" successfully created", pv_name);
else
log_verbose("Physical volume \"%s\" successfully written", pv_name);
return 1;
}
static int _verify_pv_create_params(struct pvcreate_params *pp)
{
/*
* FIXME: Some of these checks are duplicates in pvcreate_params_validate.
*/
if (pp->pva.pvmetadatacopies > 2) {
log_error("Metadatacopies may only be 0, 1 or 2");
return 0;
}
if (pp->pva.data_alignment > UINT32_MAX) {
log_error("Physical volume data alignment is too big.");
return 0;
}
if (pp->pva.data_alignment_offset > UINT32_MAX) {
log_error("Physical volume data alignment offset is too big.");
return 0;
}
return 1;
}
/*
* See if we may pvcreate on this device.
* 0 indicates we may not.
*/
static int _pvcreate_check(struct cmd_context *cmd, const char *name,
struct pvcreate_params *pp, int *wiped)
{
static const char really_init_msg[] = "Really INITIALIZE physical volume";
static const char not_init_msg[] = "physical volume not initialized";
struct physical_volume *pv;
struct device *dev;
int r = 0;
int scan_needed = 0;
int filter_refresh_needed = 0;
int used;
/* FIXME Check partition type is LVM unless --force is given */
*wiped = 0;
/* Is there a pv here already? */
pv = find_pv_by_name(cmd, name, 1, 1);
/* Allow partial & exported VGs to be destroyed. */
/* We must have -ff to overwrite a non orphan */
if (pv) {
if (!is_orphan(pv) && pp->force != DONT_PROMPT_OVERRIDE) {
log_error("Can't initialize physical volume \"%s\" of "
"volume group \"%s\" without -ff.", name, pv_vg_name(pv));
goto out;
}
if ((used = is_used_pv(pv)) < 0)
goto_out;
if (used && pp->force != DONT_PROMPT_OVERRIDE) {
log_error("PV %s is used by a VG but its metadata is missing.", name);
log_error("Can't initialize PV '%s' without -ff.", name);
goto out;
}
}
/* prompt */
if (pv && !pp->yes) {
if (is_orphan(pv)) {
if (used) {
if (yes_no_prompt("%s \"%s\" that is marked as belonging to a VG [y/n]? ",
really_init_msg, name) == 'n') {
log_error("%s: %s", name, not_init_msg);
goto out;
}
}
} else {
if (yes_no_prompt("%s \"%s\" of volume group \"%s\" [y/n]? ",
really_init_msg, name, pv_vg_name(pv)) == 'n') {
log_error("%s: %s", name, not_init_msg);
goto out;
}
}
}
if (sigint_caught())
goto_out;
dev = dev_cache_get(name, cmd->full_filter);
/*
* Refresh+rescan at the end is needed if:
* - we don't obtain device list from udev,
* hence persistent cache file is used
* and we need to trash it and reevaluate
* for any changes done outside - adding
* any new foreign signature which may affect
* filtering - before we do pvcreate, we
* need to be sure that we have up-to-date
* view for filters
*
* - we have wiped existing foreign signatures
* from dev as this may affect what's filtered
* as well
*
*
* Only rescan at the end is needed if:
* - we've just checked whether dev is fileterd
* by MD filter. We do the refresh in-situ,
* so no need to require the refresh at the
* end of this fn. This is to allow for
* wiping MD signature during pvcreate for
* the dev - the dev would normally be
* filtered because of MD filter.
* This is an exception.
*/
/* Is there an md superblock here? */
if (!dev && md_filtering()) {
if (!refresh_filters(cmd))
goto_out;
init_md_filtering(0);
dev = dev_cache_get(name, cmd->full_filter);
init_md_filtering(1);
scan_needed = 1;
} else if (!obtain_device_list_from_udev())
filter_refresh_needed = scan_needed = 1;
if (!dev) {
log_error("Device %s not found (or ignored by filtering).", name);
goto out;
}
/*
* This test will fail if the device belongs to an MD array.
*/
if (!dev_test_excl(dev)) {
/* FIXME Detect whether device-mapper itself is still using it */
log_error("Can't open %s exclusively. Mounted filesystem?",
name);
goto out;
}
if (!wipe_known_signatures(cmd, dev, name,
TYPE_LVM1_MEMBER | TYPE_LVM2_MEMBER,
0, pp->yes, pp->force, wiped)) {
log_error("Aborting pvcreate on %s.", name);
goto out;
}
if (*wiped)
filter_refresh_needed = scan_needed = 1;
if (sigint_caught())
goto_out;
if (pv && !is_orphan(pv) && pp->force)
log_warn("WARNING: Forcing physical volume creation on "
"%s%s%s%s", name,
!is_orphan(pv) ? " of volume group \"" : "",
pv_vg_name(pv),
!is_orphan(pv) ? "\"" : "");
r = 1;
out:
if (filter_refresh_needed)
if (!refresh_filters(cmd)) {
stack;
r = 0;
}
if (scan_needed) {
lvmcache_force_next_label_scan();
if (!lvmcache_label_scan(cmd)) {
stack;
r = 0;
}
}
free_pv_fid(pv);
return r;
}
/*
* pvcreate_vol() - initialize a device with PV label and metadata area
*
* Parameters:
* - pv_name: device path to initialize
* - pp: parameters to pass to pv_create; if NULL, use default values
*
* Returns:
* NULL: error
* struct physical_volume * (non-NULL): handle to physical volume created
*/
struct physical_volume *pvcreate_vol(struct cmd_context *cmd, const char *pv_name,
struct pvcreate_params *pp, int write_now)
{
struct physical_volume *pv = NULL;
struct device *dev;
int wiped = 0;
struct dm_list mdas;
struct pvcreate_params default_pp;
char buffer[64] __attribute__((aligned(8)));
dev_ext_t dev_ext_src;
pvcreate_params_set_defaults(&default_pp);
if (!pp)
pp = &default_pp;
if (!_verify_pv_create_params(pp)) {
goto bad;
}
if (pp->pva.idp) {
if ((dev = lvmcache_device_from_pvid(cmd, pp->pva.idp, NULL, NULL)) &&
(dev != dev_cache_get(pv_name, cmd->full_filter))) {
if (!id_write_format((const struct id*)&pp->pva.idp->uuid,
buffer, sizeof(buffer)))
goto_bad;
log_error("uuid %s already in use on \"%s\"", buffer,
dev_name(dev));
goto bad;
}
}
if (!_pvcreate_check(cmd, pv_name, pp, &wiped))
goto_bad;
if (sigint_caught())
goto_bad;
/*
* wipe_known_signatures called in _pvcreate_check fires
* WATCH event to update udev database. But at the moment,
* we have no way to synchronize with such event - we may
* end up still seeing the old info in udev db and pvcreate
* can fail to proceed because of the device still being
* filtered (because of the stale info in udev db).
* Disable udev dev-ext source temporarily here for
* this reason and rescan with DEV_EXT_NONE dev-ext
* source (so filters use DEV_EXT_NONE source).
*/
dev_ext_src = external_device_info_source();
if (wiped && (dev_ext_src == DEV_EXT_UDEV))
init_external_device_info_source(DEV_EXT_NONE);
dev = dev_cache_get(pv_name, cmd->full_filter);
init_external_device_info_source(dev_ext_src);
if (!dev) {
log_error("%s: Couldn't find device. Check your filters?",
pv_name);
goto bad;
}
dm_list_init(&mdas);
if (!(pv = pv_create(cmd, dev, &pp->pva))) {
log_error("Failed to setup physical volume \"%s\"", pv_name);
goto bad;
}
log_verbose("Set up physical volume for \"%s\" with %" PRIu64
" available sectors", pv_name, pv_size(pv));
pv->status |= UNLABELLED_PV;
if (write_now) {
struct pv_to_write pvw;
pvw.pp = pp;
pvw.pv = pv;
pvw.new_pv = 1;
if (!_pvcreate_write(cmd, &pvw))
goto bad;
}
return pv;
bad:
return NULL;
}
/*
* Extend a VG by a single PV / device path
*
* Parameters:
* - vg: handle of volume group to extend by 'pv_name'
* - pv_name: device path of PV to add to VG
* - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
* - max_phys_block_size: largest physical block size found amongst PVs in a VG
*
*/
static int _vg_extend_single_pv(struct volume_group *vg, char *pv_name,
struct pvcreate_params *pp,
unsigned int *max_phys_block_size)
{
struct physical_volume *pv;
struct pv_to_write *pvw;
int new_pv = 0;
pv = find_pv_by_name(vg->cmd, pv_name, 1, 1);
if (!pv && !pp) {
log_error("%s not identified as an existing "
"physical volume", pv_name);
return 0;
}
if (!pv && pp) {
if (!(pv = pvcreate_vol(vg->cmd, pv_name, pp, 0)))
return_0;
new_pv = 1;
}
if (!(check_dev_block_size_for_vg(pv->dev, (const struct volume_group *) vg,
max_phys_block_size)))
goto_bad;
if (!add_pv_to_vg(vg, pv_name, pv, new_pv))
goto_bad;
if ((pv->fmt->features & FMT_PV_FLAGS) ||
(pv->status & UNLABELLED_PV)) {
if (!(pvw = dm_pool_zalloc(vg->vgmem, sizeof(*pvw)))) {
log_error("pv_to_write allocation for '%s' failed", pv_name);
return 0;
}
pvw->pv = pv;
pvw->pp = new_pv ? pp : NULL;
pvw->new_pv = new_pv;
dm_list_add(&vg->pvs_to_write, &pvw->list);
}
return 1;
bad:
free_pv_fid(pv);
return 0;
}
/*
* Extend a VG by a single PV / device path
*
* Parameters:
* - vg: handle of volume group to extend by 'pv_name'
* - pv_count: count of device paths of PVs
* - pv_names: device paths of PVs to add to VG
* - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
*
*/
int vg_extend(struct volume_group *vg, int pv_count, const char *const *pv_names,
struct pvcreate_params *pp)
{
int i;
char *pv_name;
unsigned int max_phys_block_size = 0;
if (vg_bad_status_bits(vg, RESIZEABLE_VG))
return_0;
/* attach each pv */
for (i = 0; i < pv_count; i++) {
if (!(pv_name = dm_strdup(pv_names[i]))) {
log_error("Failed to duplicate pv name %s.", pv_names[i]);
return 0;
}
dm_unescape_colons_and_at_signs(pv_name, NULL, NULL);
if (!_vg_extend_single_pv(vg, pv_name, pp, &max_phys_block_size)) {
log_error("Unable to add physical volume '%s' to "
"volume group '%s'.", pv_name, vg->name);
dm_free(pv_name);
return 0;
}
dm_free(pv_name);
}
(void) check_pv_dev_sizes(vg);
/* FIXME Decide whether to initialise and add new mdahs to format instance */
return 1;
}
/*
* Decide whether it is "safe" to wipe the labels on this device.
* 0 indicates we may not.
*/
static int _pvremove_check(struct cmd_context *cmd, const char *name,
unsigned force_count, unsigned prompt, struct dm_list *pvslist)
{
static const char really_wipe_msg[] = "Really WIPE LABELS from physical volume";
struct device *dev;
struct pv_list *pvl;
struct physical_volume *pv = NULL;
int used;
int r = 0;
/* FIXME Check partition type is LVM unless --force is given */
if (!(dev = dev_cache_get(name, cmd->filter))) {
log_error("Device %s not found.", name);
return 0;
}
/* Is there a pv here already? */
/* If not, this is an error unless you used -f. */
if (!label_read(dev, NULL, 0)) {
if (force_count)
return 1;
log_error("No PV label found on %s.", name);
return 0;
}
dm_list_iterate_items(pvl, pvslist)
if (pvl->pv->dev == dev)
pv = pvl->pv;
if (!pv) {
log_error(INTERNAL_ERROR "Physical Volume %s has a label, "
"but is neither in a VG nor orphan.", name);
goto out; /* better safe than sorry */
}
if (is_orphan(pv)) {
if ((used = is_used_pv(pv)) < 0)
goto_out;
if (used) {
log_warn("WARNING: PV %s is used by a VG but its metadata is missing.", name);
if (force_count < 2)
goto_bad;
if (!prompt &&
yes_no_prompt("%s \"%s\" that is marked as belonging to a VG [y/n]? ",
really_wipe_msg, name) == 'n')
goto_bad;
}
} else {
log_warn("WARNING: PV %s is used by VG %s (consider using vgreduce).", name, pv_vg_name(pv));
if (force_count < 2)
goto_bad;
if (!prompt &&
yes_no_prompt("%s \"%s\" of volume group \"%s\" [y/n]? ",
really_wipe_msg, name, pv_vg_name(pv)) == 'n')
goto_bad;
}
if (force_count)
log_warn("WARNING: Wiping physical volume label from "
"%s%s%s%s", name,
!is_orphan(pv) ? " of volume group \"" : "",
pv_vg_name(pv),
!is_orphan(pv) ? "\"" : "");
r = 1;
bad:
if (!r) {
log_error("%s: physical volume label not removed.", name);
if (force_count < 2) /* Show hint as log_error() */
log_error("(If you are certain you need pvremove, "
"then confirm by using --force twice.)");
}
out:
return r;
}
static int _pvremove_single(struct cmd_context *cmd, const char *pv_name,
void *handle __attribute__((unused)), unsigned force_count,
unsigned prompt, struct dm_list *pvslist)
{
struct device *dev;
struct lvmcache_info *info;
int r = 0;
if (!_pvremove_check(cmd, pv_name, force_count, prompt, pvslist))
goto out;
if (!(dev = dev_cache_get(pv_name, cmd->filter))) {
log_error("%s: Couldn't find device. Check your filters?",
pv_name);
goto out;
}
info = lvmcache_info_from_pvid(dev->pvid, dev, 0);
if (!dev_test_excl(dev)) {
/* FIXME Detect whether device-mapper is still using the device */
log_error("Can't open %s exclusively - not removing. "
"Mounted filesystem?", dev_name(dev));
goto out;
}
/* Wipe existing label(s) */
if (!label_remove(dev)) {
log_error("Failed to wipe existing label(s) on %s", pv_name);
goto out;
}
if (!lvmetad_pv_gone_by_dev(dev))
goto_out;
log_print_unless_silent("Labels on physical volume \"%s\" successfully wiped",
pv_name);
r = 1;
out:
return r;
}
int pvremove_many(struct cmd_context *cmd, struct dm_list *pv_names,
unsigned force_count, unsigned prompt)
{
int ret = 1;
struct dm_list *pvslist = NULL;
struct pv_list *pvl;
const struct dm_str_list *pv_name;
if (!lock_vol(cmd, VG_ORPHANS, LCK_VG_WRITE, NULL)) {
log_error("Can't get lock for orphan PVs");
return 0;
}
lvmcache_seed_infos_from_lvmetad(cmd);
if (!(pvslist = get_pvs(cmd))) {
ret = 0;
goto_out;
}
dm_list_iterate_items(pv_name, pv_names) {
if (!_pvremove_single(cmd, pv_name->str, NULL, force_count, prompt, pvslist)) {
stack;
ret = 0;
}
if (sigint_caught()) {
ret = 0;
goto_out;
}
}
out:
unlock_vg(cmd, NULL, VG_ORPHANS);
if (pvslist)
dm_list_iterate_items(pvl, pvslist)
free_pv_fid(pvl->pv);
return ret;
}
/* FIXME: liblvm todo - make into function that returns handle */
struct physical_volume *find_pv_by_name(struct cmd_context *cmd,
const char *pv_name,
int allow_orphan, int allow_unformatted)
{
struct device *dev;
struct pv_list *pvl;
struct dm_list *pvslist;
struct physical_volume *pv = NULL;
lvmcache_seed_infos_from_lvmetad(cmd);
if (!(dev = dev_cache_get(pv_name, cmd->filter))) {
if (!allow_unformatted)
log_error("Physical volume %s not found", pv_name);
return_NULL;
}
if (!(pvslist = get_pvs(cmd)))
return_NULL;
dm_list_iterate_items(pvl, pvslist)
if (pvl->pv->dev == dev)
pv = pvl->pv;
else
free_pv_fid(pvl->pv);
if (!pv && !allow_unformatted)
log_error("Physical volume %s not found", pv_name);
if (pv && !allow_orphan && is_orphan_vg(pv->vg_name)) {
log_error("Physical volume %s not in a volume group", pv_name);
goto bad;
}
return pv;
bad:
free_pv_fid(pv);
return NULL;
}

View File

@@ -19,7 +19,6 @@
#include "toolcontext.h"
#include "lvm-string.h"
#include "lvm-file.h"
#include "lvm-signal.h"
#include "lvmcache.h"
#include "lvmetad.h"
#include "memlock.h"
@@ -44,9 +43,6 @@ static struct physical_volume *_pv_read(struct cmd_context *cmd,
struct format_instance *fid,
uint32_t warn_flags, int scan_label_only);
static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
uint64_t status);
static int _alignment_overrides_default(unsigned long data_alignment,
unsigned long default_pe_align)
{
@@ -177,8 +173,8 @@ void del_pvl_from_vgs(struct volume_group *vg, struct pv_list *pvl)
* 1 - success
* FIXME: remove pv_name - obtain safely from pv
*/
static int add_pv_to_vg(struct volume_group *vg, const char *pv_name,
struct physical_volume *pv, int new_pv)
int add_pv_to_vg(struct volume_group *vg, const char *pv_name,
struct physical_volume *pv, int new_pv)
{
struct pv_list *pvl;
struct format_instance *fid = vg->fid;
@@ -387,8 +383,8 @@ static int _move_pv(struct volume_group *vg_from, struct volume_group *vg_to,
return 0;
}
if (_vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
_vg_bad_status_bits(vg_to, RESIZEABLE_VG))
if (vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
vg_bad_status_bits(vg_to, RESIZEABLE_VG))
return 0;
del_pvl_from_vgs(vg_from, pvl);
@@ -427,8 +423,8 @@ int move_pvs_used_by_lv(struct volume_group *vg_from,
return 0;
}
if (_vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
_vg_bad_status_bits(vg_to, RESIZEABLE_VG))
if (vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
vg_bad_status_bits(vg_to, RESIZEABLE_VG))
return 0;
dm_list_iterate_items(lvseg, &lvl->lv->segments) {
@@ -515,11 +511,19 @@ int vg_rename(struct cmd_context *cmd, struct volume_group *vg,
}
dm_list_iterate_items(pvl, &vg->pvs) {
/* Skip if VG didn't change e.g. with vgsplit */
if (pvl->pv->vg_name && !strcmp(new_name, pvl->pv->vg_name))
continue;
if (!(pvl->pv->vg_name = dm_pool_strdup(mem, new_name))) {
log_error("pv->vg_name allocation failed for '%s'",
pv_dev_name(pvl->pv));
return 0;
}
/* Mark the PVs that still hold metadata with the old VG name */
log_debug_metadata("Marking PV %s as moved to VG %s", dev_name(pvl->pv->dev), new_name);
pvl->pv->status |= PV_MOVED_VG;
}
return 1;
@@ -673,7 +677,7 @@ int vg_check_pv_dev_block_sizes(const struct volume_group *vg)
return 1;
}
static int _check_pv_dev_sizes(struct volume_group *vg)
int check_pv_dev_sizes(struct volume_group *vg)
{
struct pv_list *pvl;
uint64_t dev_size, size;
@@ -708,125 +712,16 @@ static int _check_pv_dev_sizes(struct volume_group *vg)
return r;
}
/*
* Extend a VG by a single PV / device path
*
* Parameters:
* - vg: handle of volume group to extend by 'pv_name'
* - pv_name: device path of PV to add to VG
* - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
* - max_phys_block_size: largest physical block size found amongst PVs in a VG
*
*/
static int vg_extend_single_pv(struct volume_group *vg, char *pv_name,
struct pvcreate_params *pp,
unsigned int *max_phys_block_size)
{
struct physical_volume *pv;
struct pv_to_write *pvw;
int new_pv = 0;
pv = find_pv_by_name(vg->cmd, pv_name, 1, 1);
if (!pv && !pp) {
log_error("%s not identified as an existing "
"physical volume", pv_name);
return 0;
}
if (!pv && pp) {
if (!(pv = pvcreate_vol(vg->cmd, pv_name, pp, 0)))
return_0;
new_pv = 1;
}
if (!(check_dev_block_size_for_vg(pv->dev, (const struct volume_group *) vg,
max_phys_block_size)))
goto_bad;
if (!add_pv_to_vg(vg, pv_name, pv, new_pv))
goto_bad;
if ((pv->fmt->features & FMT_PV_FLAGS) ||
(pv->status & UNLABELLED_PV)) {
if (!(pvw = dm_pool_zalloc(vg->vgmem, sizeof(*pvw)))) {
log_error("pv_to_write allocation for '%s' failed", pv_name);
return 0;
}
pvw->pv = pv;
pvw->pp = new_pv ? pp : NULL;
pvw->new_pv = new_pv;
dm_list_add(&vg->pvs_to_write, &pvw->list);
}
return 1;
bad:
free_pv_fid(pv);
return 0;
}
/*
* FIXME: commands shifting to common code in toollib have left a large
* amount of code only used by liblvm. Either remove this by shifting
* liblvm to use toollib, or isolate all this code into a liblvm-specific
* source file. All the following and more are only used by liblvm:
*
* . vg_extend()
* . vg_extend_single_pv()
* . pvcreate_vol()
* . _pvcreate_check()
* . _pvcreate_write()
* . pvremove_many()
* . pvremove_single()
* . find_pv_by_name()
* . get_pvs()
* . the vg->pvs_to_write list and pv_to_write struct
* . vg_reduce()
*/
/*
* Extend a VG by a single PV / device path
*
* Parameters:
* - vg: handle of volume group to extend by 'pv_name'
* - pv_count: count of device paths of PVs
* - pv_names: device paths of PVs to add to VG
* - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
*
*/
int vg_extend(struct volume_group *vg, int pv_count, const char *const *pv_names,
struct pvcreate_params *pp)
{
int i;
char *pv_name;
unsigned int max_phys_block_size = 0;
if (_vg_bad_status_bits(vg, RESIZEABLE_VG))
return_0;
/* attach each pv */
for (i = 0; i < pv_count; i++) {
if (!(pv_name = dm_strdup(pv_names[i]))) {
log_error("Failed to duplicate pv name %s.", pv_names[i]);
return 0;
}
dm_unescape_colons_and_at_signs(pv_name, NULL, NULL);
if (!vg_extend_single_pv(vg, pv_name, pp, &max_phys_block_size)) {
log_error("Unable to add physical volume '%s' to "
"volume group '%s'.", pv_name, vg->name);
dm_free(pv_name);
return 0;
}
dm_free(pv_name);
}
(void) _check_pv_dev_sizes(vg);
/* FIXME Decide whether to initialise and add new mdahs to format instance */
return 1;
}
int vg_extend_each_pv(struct volume_group *vg, struct pvcreate_params *pp)
{
struct pv_list *pvl;
@@ -834,7 +729,7 @@ int vg_extend_each_pv(struct volume_group *vg, struct pvcreate_params *pp)
log_debug_metadata("Adding PVs to VG %s.", vg->name);
if (_vg_bad_status_bits(vg, RESIZEABLE_VG))
if (vg_bad_status_bits(vg, RESIZEABLE_VG))
return_0;
dm_list_iterate_items(pvl, &pp->pvs) {
@@ -854,37 +749,13 @@ int vg_extend_each_pv(struct volume_group *vg, struct pvcreate_params *pp)
}
}
(void) _check_pv_dev_sizes(vg);
(void) check_pv_dev_sizes(vg);
dm_list_splice(&vg->pv_write_list, &pp->pvs);
return 1;
}
int vg_reduce(struct volume_group *vg, const char *pv_name)
{
struct physical_volume *pv;
struct pv_list *pvl;
if (!(pvl = find_pv_in_vg(vg, pv_name))) {
log_error("Physical volume %s not in volume group %s.",
pv_name, vg->name);
return 0;
}
pv = pvl->pv;
if (vgreduce_single(vg->cmd, vg, pv, 0)) {
dm_list_add(&vg->removed_pvs, &pvl->list);
return 1;
}
log_error("Unable to remove physical volume '%s' from "
"volume group '%s'.", pv_name, vg->name);
return 0;
}
int lv_change_tag(struct logical_volume *lv, const char *tag, int add_tag)
{
char *tag_new;
@@ -1026,51 +897,41 @@ int vgcreate_params_validate(struct cmd_context *cmd,
return 1;
}
static void _vg_wipe_cached_precommitted(struct volume_group *vg)
{
release_vg(vg->vg_precommitted);
vg->vg_precommitted = NULL;
}
static void _vg_move_cached_precommitted_to_committed(struct volume_group *vg)
{
release_vg(vg->vg_committed);
vg->vg_committed = vg->vg_precommitted;
vg->vg_precommitted = NULL;
}
/*
* Update content of precommitted VG
*
* TODO: Optimize in the future, since lvmetad needs similar
* config tree processing in lvmetad_vg_update().
*/
static int _vg_update_vg_precommitted(struct volume_group *vg)
static int _vg_update_embedded_copy(struct volume_group *vg, struct volume_group **vg_embedded)
{
release_vg(vg->vg_precommitted);
vg->vg_precommitted = NULL;
struct dm_config_tree *cft;
if (vg->cft_precommitted) {
dm_config_destroy(vg->cft_precommitted);
vg->cft_precommitted = NULL;
}
_vg_wipe_cached_precommitted(vg);
if (!(vg->cft_precommitted = export_vg_to_config_tree(vg)))
/* Copy the VG using an export followed by import */
if (!(cft = export_vg_to_config_tree(vg)))
return_0;
if (!(vg->vg_precommitted = import_vg_from_config_tree(vg->cft_precommitted, vg->fid))) {
dm_config_destroy(vg->cft_precommitted);
vg->cft_precommitted = NULL;
if (!(*vg_embedded = import_vg_from_config_tree(cft, vg->fid))) {
dm_config_destroy(cft);
return_0;
}
return 1;
}
static int _vg_update_vg_committed(struct volume_group *vg)
{
if (dm_pool_locked(vg->vgmem))
return 1;
if (vg->vg_committed || is_orphan_vg(vg->name)) /* we already have it */
return 1;
if (!_vg_update_vg_precommitted(vg))
return_0;
vg->vg_committed = vg->vg_precommitted;
vg->vg_precommitted = NULL;
if (vg->cft_precommitted) {
dm_config_destroy(vg->cft_precommitted);
vg->cft_precommitted = NULL;
}
dm_config_destroy(cft);
return 1;
}
@@ -1083,7 +944,6 @@ static struct volume_group *_vg_make_handle(struct cmd_context *cmd,
struct volume_group *vg,
uint32_t failure)
{
/* Never return a cached VG structure for a failure */
if (vg && vg->vginfo && failure != SUCCESS) {
release_vg(vg);
@@ -1095,8 +955,13 @@ static struct volume_group *_vg_make_handle(struct cmd_context *cmd,
vg->read_status = failure;
if (vg->fid && !_vg_update_vg_committed(vg))
vg->read_status |= FAILED_ALLOCATION;
/*
* If we hold a write lock and might be changing the VG contents, embed a pristine
* copy of the VG metadata for the activation code to use later
*/
if (vg->fid && !dm_pool_locked(vg->vgmem) && !vg->vg_committed && !is_orphan_vg(vg->name))
if (vg_write_lock_held() && !_vg_update_embedded_copy(vg, &vg->vg_committed))
vg->read_status |= FAILED_ALLOCATION;
return vg;
}
@@ -1571,168 +1436,6 @@ void pvcreate_params_set_defaults(struct pvcreate_params *pp)
dm_list_init(&pp->pvs);
}
/*
* See if we may pvcreate on this device.
* 0 indicates we may not.
*/
static int _pvcreate_check(struct cmd_context *cmd, const char *name,
struct pvcreate_params *pp, int *wiped)
{
static const char really_init_msg[] = "Really INITIALIZE physical volume";
static const char not_init_msg[] = "physical volume not initialized";
struct physical_volume *pv;
struct device *dev;
int r = 0;
int scan_needed = 0;
int filter_refresh_needed = 0;
int used;
/* FIXME Check partition type is LVM unless --force is given */
*wiped = 0;
/* Is there a pv here already? */
pv = find_pv_by_name(cmd, name, 1, 1);
/* Allow partial & exported VGs to be destroyed. */
/* We must have -ff to overwrite a non orphan */
if (pv) {
if (!is_orphan(pv) && pp->force != DONT_PROMPT_OVERRIDE) {
log_error("Can't initialize physical volume \"%s\" of "
"volume group \"%s\" without -ff.", name, pv_vg_name(pv));
goto out;
}
if ((used = is_used_pv(pv)) < 0)
goto_out;
if (used && pp->force != DONT_PROMPT_OVERRIDE) {
log_error("PV %s is used by a VG but its metadata is missing.", name);
log_error("Can't initialize PV '%s' without -ff.", name);
goto out;
}
}
/* prompt */
if (pv && !pp->yes) {
if (is_orphan(pv)) {
if (used) {
if (yes_no_prompt("%s \"%s\" that is marked as belonging to a VG [y/n]? ",
really_init_msg, name) == 'n') {
log_error("%s: %s", name, not_init_msg);
goto out;
}
}
} else {
if (yes_no_prompt("%s \"%s\" of volume group \"%s\" [y/n]? ",
really_init_msg, name, pv_vg_name(pv)) == 'n') {
log_error("%s: %s", name, not_init_msg);
goto out;
}
}
}
if (sigint_caught())
goto_out;
dev = dev_cache_get(name, cmd->full_filter);
/*
* Refresh+rescan at the end is needed if:
* - we don't obtain device list from udev,
* hence persistent cache file is used
* and we need to trash it and reevaluate
* for any changes done outside - adding
* any new foreign signature which may affect
* filtering - before we do pvcreate, we
* need to be sure that we have up-to-date
* view for filters
*
* - we have wiped existing foreign signatures
* from dev as this may affect what's filtered
* as well
*
*
* Only rescan at the end is needed if:
* - we've just checked whether dev is fileterd
* by MD filter. We do the refresh in-situ,
* so no need to require the refresh at the
* end of this fn. This is to allow for
* wiping MD signature during pvcreate for
* the dev - the dev would normally be
* filtered because of MD filter.
* This is an exception.
*/
/* Is there an md superblock here? */
if (!dev && md_filtering()) {
if (!refresh_filters(cmd))
goto_out;
init_md_filtering(0);
dev = dev_cache_get(name, cmd->full_filter);
init_md_filtering(1);
scan_needed = 1;
} else if (!obtain_device_list_from_udev())
filter_refresh_needed = scan_needed = 1;
if (!dev) {
log_error("Device %s not found (or ignored by filtering).", name);
goto out;
}
/*
* This test will fail if the device belongs to an MD array.
*/
if (!dev_test_excl(dev)) {
/* FIXME Detect whether device-mapper itself is still using it */
log_error("Can't open %s exclusively. Mounted filesystem?",
name);
goto out;
}
if (!wipe_known_signatures(cmd, dev, name,
TYPE_LVM1_MEMBER | TYPE_LVM2_MEMBER,
0, pp->yes, pp->force, wiped)) {
log_error("Aborting pvcreate on %s.", name);
goto out;
}
if (*wiped)
filter_refresh_needed = scan_needed = 1;
if (sigint_caught())
goto_out;
if (pv && !is_orphan(pv) && pp->force)
log_warn("WARNING: Forcing physical volume creation on "
"%s%s%s%s", name,
!is_orphan(pv) ? " of volume group \"" : "",
pv_vg_name(pv),
!is_orphan(pv) ? "\"" : "");
r = 1;
out:
if (filter_refresh_needed)
if (!refresh_filters(cmd)) {
stack;
r = 0;
}
if (scan_needed) {
lvmcache_force_next_label_scan();
if (!lvmcache_label_scan(cmd)) {
stack;
r = 0;
}
}
free_pv_fid(pv);
return r;
}
static int _pvcreate_write(struct cmd_context *cmd, struct pv_to_write *pvw)
{
struct physical_volume *pv = pvw->pv;
@@ -1753,7 +1456,7 @@ static int _pvcreate_write(struct cmd_context *cmd, struct pv_to_write *pvw)
return 0;
}
if (!dev_set(dev, UINT64_C(0), (size_t) 2048, 0)) {
if (!dev_set(dev, UINT64_C(0), (size_t) 2048, DEV_IO_LABEL, 0)) {
log_error("%s not wiped: aborting", pv_name);
if (!dev_close(dev))
stack;
@@ -1780,129 +1483,6 @@ static int _pvcreate_write(struct cmd_context *cmd, struct pv_to_write *pvw)
return 1;
}
static int _verify_pv_create_params(struct pvcreate_params *pp)
{
/*
* FIXME: Some of these checks are duplicates in pvcreate_params_validate.
*/
if (pp->pva.pvmetadatacopies > 2) {
log_error("Metadatacopies may only be 0, 1 or 2");
return 0;
}
if (pp->pva.data_alignment > UINT32_MAX) {
log_error("Physical volume data alignment is too big.");
return 0;
}
if (pp->pva.data_alignment_offset > UINT32_MAX) {
log_error("Physical volume data alignment offset is too big.");
return 0;
}
return 1;
}
/*
* pvcreate_vol() - initialize a device with PV label and metadata area
*
* Parameters:
* - pv_name: device path to initialize
* - pp: parameters to pass to pv_create; if NULL, use default values
*
* Returns:
* NULL: error
* struct physical_volume * (non-NULL): handle to physical volume created
*/
struct physical_volume *pvcreate_vol(struct cmd_context *cmd, const char *pv_name,
struct pvcreate_params *pp, int write_now)
{
struct physical_volume *pv = NULL;
struct device *dev;
int wiped = 0;
struct dm_list mdas;
struct pvcreate_params default_pp;
char buffer[64] __attribute__((aligned(8)));
dev_ext_t dev_ext_src;
pvcreate_params_set_defaults(&default_pp);
if (!pp)
pp = &default_pp;
if (!_verify_pv_create_params(pp)) {
goto bad;
}
if (pp->pva.idp) {
if ((dev = lvmcache_device_from_pvid(cmd, pp->pva.idp, NULL, NULL)) &&
(dev != dev_cache_get(pv_name, cmd->full_filter))) {
if (!id_write_format((const struct id*)&pp->pva.idp->uuid,
buffer, sizeof(buffer)))
goto_bad;
log_error("uuid %s already in use on \"%s\"", buffer,
dev_name(dev));
goto bad;
}
}
if (!_pvcreate_check(cmd, pv_name, pp, &wiped))
goto_bad;
if (sigint_caught())
goto_bad;
/*
* wipe_known_signatures called in _pvcreate_check fires
* WATCH event to update udev database. But at the moment,
* we have no way to synchronize with such event - we may
* end up still seeing the old info in udev db and pvcreate
* can fail to proceed because of the device still being
* filtered (because of the stale info in udev db).
* Disable udev dev-ext source temporarily here for
* this reason and rescan with DEV_EXT_NONE dev-ext
* source (so filters use DEV_EXT_NONE source).
*/
dev_ext_src = external_device_info_source();
if (wiped && (dev_ext_src == DEV_EXT_UDEV))
init_external_device_info_source(DEV_EXT_NONE);
dev = dev_cache_get(pv_name, cmd->full_filter);
init_external_device_info_source(dev_ext_src);
if (!dev) {
log_error("%s: Couldn't find device. Check your filters?",
pv_name);
goto bad;
}
dm_list_init(&mdas);
if (!(pv = pv_create(cmd, dev, &pp->pva))) {
log_error("Failed to setup physical volume \"%s\"", pv_name);
goto bad;
}
log_verbose("Set up physical volume for \"%s\" with %" PRIu64
" available sectors", pv_name, pv_size(pv));
pv->status |= UNLABELLED_PV;
if (write_now) {
struct pv_to_write pvw;
pvw.pp = pp;
pvw.pv = pv;
pvw.new_pv = 1;
if (!_pvcreate_write(cmd, &pvw))
goto bad;
}
return pv;
bad:
return NULL;
}
static struct physical_volume *_alloc_pv(struct dm_pool *mem, struct device *dev)
{
struct physical_volume *pv;
@@ -2201,48 +1781,6 @@ struct physical_volume *find_pv(struct volume_group *vg, struct device *dev)
return NULL;
}
/* FIXME: liblvm todo - make into function that returns handle */
struct physical_volume *find_pv_by_name(struct cmd_context *cmd,
const char *pv_name,
int allow_orphan, int allow_unformatted)
{
struct device *dev;
struct pv_list *pvl;
struct dm_list *pvslist;
struct physical_volume *pv = NULL;
lvmcache_seed_infos_from_lvmetad(cmd);
if (!(dev = dev_cache_get(pv_name, cmd->filter))) {
if (!allow_unformatted)
log_error("Physical volume %s not found", pv_name);
return_NULL;
}
if (!(pvslist = get_pvs(cmd)))
return_NULL;
dm_list_iterate_items(pvl, pvslist)
if (pvl->pv->dev == dev)
pv = pvl->pv;
else
free_pv_fid(pvl->pv);
if (!pv && !allow_unformatted)
log_error("Physical volume %s not found", pv_name);
if (pv && !allow_orphan && is_orphan_vg(pv->vg_name)) {
log_error("Physical volume %s not in a volume group", pv_name);
goto bad;
}
return pv;
bad:
free_pv_fid(pv);
return NULL;
}
/* Find segment at a given logical extent in an LV */
struct lv_segment *find_seg_by_le(const struct logical_volume *lv, uint32_t le)
{
@@ -2402,8 +1940,6 @@ static int _lv_each_dependency(struct logical_volume *lv,
struct dm_list *snh;
struct logical_volume *deps[] = {
(lv->rdevice && lv != lv->rdevice->lv) ? lv->rdevice->lv : 0,
(lv->rdevice && lv != lv->rdevice->slog) ? lv->rdevice->slog : 0,
lv->snapshot ? lv->snapshot->origin : 0,
lv->snapshot ? lv->snapshot->cow : 0 };
for (i = 0; i < DM_ARRAY_SIZE(deps); ++i) {
@@ -2416,8 +1952,6 @@ static int _lv_each_dependency(struct logical_volume *lv,
return_0;
if (lvseg->log_lv && !fn(lvseg->log_lv, data))
return_0;
if (lvseg->rlog_lv && !fn(lvseg->rlog_lv, data))
return_0;
if (lvseg->pool_lv && !fn(lvseg->pool_lv, data))
return_0;
if (lvseg->metadata_lv && !fn(lvseg->metadata_lv, data))
@@ -3505,6 +3039,8 @@ int vg_write(struct volume_group *vg)
/* Write to each copy of the metadata area */
dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use) {
if (mda->status & MDA_FAILED)
continue;
if (!mda->ops->vg_write) {
log_error("Format does not support writing volume"
"group metadata areas");
@@ -3529,6 +3065,9 @@ int vg_write(struct volume_group *vg)
dm_list_uniterate(mdah, &vg->fid->metadata_areas_in_use, &mda->list) {
mda = dm_list_item(mdah, struct metadata_area);
if (mda->status & MDA_FAILED)
continue;
if (mda->ops->vg_revert &&
!mda->ops->vg_revert(vg->fid, vg, mda)) {
stack;
@@ -3557,7 +3096,7 @@ int vg_write(struct volume_group *vg)
}
}
if (!_vg_update_vg_precommitted(vg)) /* prepare precommited */
if (!_vg_update_embedded_copy(vg, &vg->vg_precommitted)) /* prepare precommited */
return_0;
lockd_vg_update(vg);
@@ -3616,6 +3155,7 @@ static int _vg_commit_mdas(struct volume_group *vg)
int vg_commit(struct volume_group *vg)
{
int cache_updated = 0;
struct pv_list *pvl;
if (!lvmcache_vgname_is_locked(vg->name)) {
log_error(INTERNAL_ERROR "Attempt to write new VG metadata "
@@ -3631,20 +3171,17 @@ int vg_commit(struct volume_group *vg)
/* Instruct remote nodes to upgrade cached metadata. */
if (!remote_commit_cached_metadata(vg))
stack; // FIXME: What should we do?
/*
* We need to clear old_name after a successful commit.
* The volume_group structure could be reused later.
*/
vg->old_name = NULL;
dm_list_iterate_items(pvl, &vg->pvs)
pvl->pv->status &= ~PV_MOVED_VG;
/* This *is* the original now that it's commited. */
release_vg(vg->vg_committed);
vg->vg_committed = vg->vg_precommitted;
vg->vg_precommitted = NULL;
if (vg->cft_precommitted) {
dm_config_destroy(vg->cft_precommitted);
vg->cft_precommitted = NULL;
}
_vg_move_cached_precommitted_to_committed(vg);
}
/* If update failed, remove any cached precommitted metadata. */
@@ -3677,12 +3214,7 @@ void vg_revert(struct volume_group *vg)
}
}
release_vg(vg->vg_precommitted); /* VG is no longer needed */
vg->vg_precommitted = NULL;
if (vg->cft_precommitted) {
dm_config_destroy(vg->cft_precommitted);
vg->cft_precommitted = NULL;
}
_vg_wipe_cached_precommitted(vg); /* VG is no longer needed */
dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use) {
if (mda->ops->vg_revert &&
@@ -4253,12 +3785,18 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
struct dm_list *pvids;
struct pv_list *pvl;
struct dm_list all_pvs;
char uuid[64] __attribute__((aligned(8)));
unsigned seqno = 0;
int reappeared = 0;
struct cached_vg_fmtdata *vg_fmtdata = NULL; /* Additional format-specific data about the vg */
unsigned use_previous_vg;
log_very_verbose("Reading VG %s %.32s", vgname ?: "<no name>", vgid ?: "<no vgid>");
uuid[0] = '\0';
if (vgid && !id_write_format((const struct id*)vgid, uuid, sizeof(uuid)))
stack;
log_very_verbose("Reading VG %s %s", vgname ?: "<no name>", vgid ? uuid : "<no vgid>");
if (is_orphan_vg(vgname)) {
if (use_precommitted) {
@@ -4335,7 +3873,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
/* Now determine the correct vgname if none was supplied */
if (!vgname && !(vgname = lvmcache_vgname_from_vgid(cmd->mem, vgid))) {
log_debug_metadata("Cache did not find VG name from vgid %.32s", vgid);
log_debug_metadata("Cache did not find VG name from vgid %s", uuid);
return_NULL;
}
@@ -4374,9 +3912,9 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
use_previous_vg = 0;
if ((use_precommitted &&
!(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) ||
!(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg) ||
(!use_precommitted &&
!(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg)) {
!(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0, 0)) && !use_previous_vg)) {
inconsistent = 1;
vg_fmtdata = NULL;
continue;
@@ -4566,9 +4104,9 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
use_previous_vg = 0;
if ((use_precommitted &&
!(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) ||
!(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg) ||
(!use_precommitted &&
!(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg)) {
!(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0, 0)) && !use_previous_vg)) {
inconsistent = 1;
vg_fmtdata = NULL;
continue;
@@ -4897,7 +4435,7 @@ struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vgnam
if (!(vg = _vg_read(cmd, vgname, vgid, warn_flags, consistent, 0)))
goto_out;
if (!_check_pv_dev_sizes(vg))
if (!check_pv_dev_sizes(vg))
log_warn("One or more devices used as PVs in VG %s "
"have changed sizes.", vg->name);
@@ -5486,8 +5024,7 @@ int pv_analyze(struct cmd_context *cmd, const char *pv_name,
dev = dev_cache_get(pv_name, cmd->filter);
if (!dev) {
log_error("Device %s not found (or ignored by filtering).",
pv_name);
log_error("Device %s %s.", pv_name, dev_cache_filtered_reason(pv_name));
return 0;
}
@@ -5572,8 +5109,7 @@ static int _access_vg_clustered(struct cmd_context *cmd, const struct volume_gro
*
* FIXME Remove the unnecessary duplicate definitions and return bits directly.
*/
static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
uint64_t status)
uint32_t vg_bad_status_bits(const struct volume_group *vg, uint64_t status)
{
uint32_t failure = 0;
@@ -5609,7 +5145,7 @@ static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
*/
int vg_check_status(const struct volume_group *vg, uint64_t status)
{
return !_vg_bad_status_bits(vg, status);
return !vg_bad_status_bits(vg, status);
}
/*
@@ -5829,7 +5365,7 @@ static int _access_vg_systemid(struct cmd_context *cmd, struct volume_group *vg)
}
/*
* FIXME: move _vg_bad_status_bits() checks in here.
* FIXME: move vg_bad_status_bits() checks in here.
*/
static int _vg_access_permitted(struct cmd_context *cmd, struct volume_group *vg,
uint32_t lockd_state, uint32_t *failure)
@@ -5965,7 +5501,7 @@ static struct volume_group *_vg_lock_and_read(struct cmd_context *cmd, const cha
goto bad;
}
failure |= _vg_bad_status_bits(vg, status_flags);
failure |= vg_bad_status_bits(vg, status_flags);
if (failure)
goto_bad;

View File

@@ -48,7 +48,6 @@
*/
#define dm_round_up(n, sz) (dm_div_up((n), (sz)) * (sz))
/* Various flags */
/* See metadata-exported.h for the complete list. */
/* Note that the bits no longer necessarily correspond to LVM1 disk format */
@@ -81,12 +80,12 @@ struct metadata_area_ops {
struct metadata_area * mda,
struct cached_vg_fmtdata **vg_fmtdata,
unsigned *use_previous_vg,
int single_device);
int single_device, unsigned ioflags);
struct volume_group *(*vg_read_precommit) (struct format_instance * fi,
const char *vg_name,
struct metadata_area * mda,
struct cached_vg_fmtdata **vg_fmtdata,
unsigned *use_previous_vg);
unsigned *use_previous_vg, unsigned ioflags);
/*
* Write out complete VG metadata. You must ensure internal
* consistency before calling. eg. PEs can't refer to PVs not
@@ -162,6 +161,13 @@ struct metadata_area_ops {
#define MDA_INCONSISTENT 0x00000002
#define MDA_FAILED 0x00000004
/* The primary metadata area on a device if the format supports more than one. */
#define MDA_PRIMARY 0x00000008
#define mda_is_primary(mda) (((mda->status) & MDA_PRIMARY) ? 1 : 0)
#define MDA_CONTENT_REASON(primary_mda) ((primary_mda) ? DEV_IO_MDA_CONTENT : DEV_IO_MDA_EXTRA_CONTENT)
#define MDA_HEADER_REASON(primary_mda) ((primary_mda) ? DEV_IO_MDA_HEADER : DEV_IO_MDA_EXTRA_HEADER)
struct metadata_area {
struct dm_list list;
struct metadata_area_ops *ops;
@@ -353,16 +359,13 @@ unsigned long set_pe_align_offset(struct physical_volume *pv,
int pv_write_orphan(struct cmd_context *cmd, struct physical_volume *pv);
struct physical_volume *pvcreate_vol(struct cmd_context *cmd, const char *pv_name,
struct pvcreate_params *pp, int write_now);
int check_dev_block_size_for_vg(struct device *dev, const struct volume_group *vg,
unsigned int *max_phys_block_size_found);
int check_pv_dev_sizes(struct volume_group *vg);
uint32_t vg_bad_status_bits(const struct volume_group *vg, uint64_t status);
int add_pv_to_vg(struct volume_group *vg, const char *pv_name,
struct physical_volume *pv, int new_pv);
/* Manipulate PV structures */
int pv_add(struct volume_group *vg, struct physical_volume *pv);
int pv_remove(struct volume_group *vg, struct physical_volume *pv);
struct physical_volume *pv_find(struct volume_group *vg, const char *pv_name);
/* Find a PV within a given VG */
int get_pv_from_vg_by_id(const struct format_type *fmt, const char *vg_name,
@@ -375,10 +378,6 @@ struct logical_volume *find_lv_in_vg_by_lvid(struct volume_group *vg,
struct lv_list *find_lv_in_lv_list(const struct dm_list *ll,
const struct logical_volume *lv);
/* Return the VG that contains a given LV (based on path given in lv_name) */
/* or environment var */
struct volume_group *find_vg_with_lv(const char *lv_name);
/* Find LV with given lvid (used during activation) */
struct logical_volume *lv_from_lvid(struct cmd_context *cmd,
const char *lvid_s,
@@ -420,11 +419,6 @@ int check_lv_segments(struct logical_volume *lv, int complete_vg);
*/
int lv_has_constant_stripes(struct logical_volume *lv);
/*
* Checks that a replicator segment is correct.
*/
int check_replicator_segment(const struct lv_segment *rseg);
/*
* Sometimes (eg, after an lvextend), it is possible to merge two
* adjacent segments into a single segment. This function trys
@@ -471,8 +465,6 @@ void lv_calculate_readahead(const struct logical_volume *lv, uint32_t *read_ahea
*/
size_t export_vg_to_buffer(struct volume_group *vg, char **buf);
struct dm_config_tree *export_vg_to_config_tree(struct volume_group *vg);
struct volume_group *import_vg_from_buffer(const char *buf,
struct format_instance *fid);
struct volume_group *import_vg_from_config_tree(const struct dm_config_tree *cft,
struct format_instance *fid);
struct volume_group *import_vg_from_lvmetad_config_tree(const struct dm_config_tree *cft,

View File

@@ -304,7 +304,7 @@ static int _write_log_header(struct cmd_context *cmd, struct logical_volume *lv)
if (!dev_open_quiet(dev))
return 0;
if (!dev_write(dev, UINT64_C(0), sizeof(log_header), &log_header)) {
if (!dev_write(dev, UINT64_C(0), sizeof(log_header), DEV_IO_LV, &log_header)) {
log_error("Failed to write log header to %s.", name);
dev_close_immediate(dev);
return 0;
@@ -1038,50 +1038,8 @@ static int _remove_mirror_images(struct logical_volume *lv,
* remove the LVs from the mirror set, commit that metadata
* then deactivate and remove them fully.
*/
if (!vg_write(mirrored_seg->lv->vg)) {
log_error("intermediate VG write failed.");
return 0;
}
if (!suspend_lv_origin(mirrored_seg->lv->vg->cmd, mirrored_seg->lv)) {
log_error("Failed to lock %s.", display_lvname(mirrored_seg->lv));
vg_revert(mirrored_seg->lv->vg);
return 0;
}
/* FIXME: second suspend should not be needed
* Explicitly suspend temporary LV.
* This balances critical_section_inc() calls with critical_section_dec()
* in resume (both local and cluster) and also properly propagates precommitted
* metadata into dm table on other nodes.
* FIXME: check propagation of suspend with visible flag
*/
if (temp_layer_lv && !suspend_lv(temp_layer_lv->vg->cmd, temp_layer_lv))
log_error("Problem suspending temporary LV %s.", display_lvname(temp_layer_lv));
if (!vg_commit(mirrored_seg->lv->vg)) {
if (!resume_lv(mirrored_seg->lv->vg->cmd, mirrored_seg->lv))
stack;
if (!lv_update_and_reload_origin(mirrored_seg->lv))
return_0;
}
log_very_verbose("Updating %s in kernel.", display_lvname(mirrored_seg->lv));
/*
* Avoid having same mirror target loaded twice simultaneously by first
* resuming the removed LV which now contains an error segment.
* As it's now detached from mirrored_seg->lv we must resume it
* explicitly.
*/
if (temp_layer_lv && !resume_lv(temp_layer_lv->vg->cmd, temp_layer_lv)) {
log_error("Problem resuming temporary LV %s.", display_lvname(temp_layer_lv));
return 0;
}
if (!resume_lv_origin(mirrored_seg->lv->vg->cmd, mirrored_seg->lv)) {
log_error("Problem reactivating %s.", display_lvname(mirrored_seg->lv));
return 0;
}
/* Save or delete the 'orphan' LVs */
reactivate = lv_is_active(lv_lock_holder(lv));
@@ -1512,6 +1470,9 @@ const struct logical_volume *find_pvmove_lv_in_lv(const struct logical_volume *l
const struct lv_segment *seg;
uint32_t s;
if (lv_is_pvmove(lv))
return lv;
dm_list_iterate_items(seg, &lv->segments) {
for (s = 0; s < seg->area_count; s++) {
if (seg_type(seg, s) != AREA_LV)
@@ -2145,6 +2106,18 @@ int lv_add_mirrors(struct cmd_context *cmd, struct logical_volume *lv,
}
}
if (lv->vg->lock_type && !strcmp(lv->vg->lock_type, "dlm") && cmd->lockd_lv_sh) {
if (!cluster_mirror_is_available(cmd)) {
log_error("Shared cluster mirrors are not available.");
return 0;
}
if (log_count > 1) {
log_error("Log type, \"mirrored\", is unavailable to cluster mirrors.");
return 0;
}
}
/* For corelog mirror, activation code depends on
* the global mirror_in_sync status. As we are adding
* a new mirror, it should be set as 'out-of-sync'

View File

@@ -526,7 +526,7 @@ int create_pool(struct logical_volume *pool_lv,
* or directly converted to invisible device via suspend/resume
*/
pool_lv->status |= LV_TEMPORARY;
if (!activate_lv_local(pool_lv->vg->cmd, pool_lv)) {
if (!activate_lv_excl_local(pool_lv->vg->cmd, pool_lv)) {
log_error("Aborting. Failed to activate pool metadata %s.",
display_lvname(pool_lv));
goto bad;
@@ -538,7 +538,7 @@ int create_pool(struct logical_volume *pool_lv,
}
pool_lv->status &= ~LV_TEMPORARY;
/* Deactivates cleared metadata LV */
if (!deactivate_lv_local(pool_lv->vg->cmd, pool_lv)) {
if (!deactivate_lv(pool_lv->vg->cmd, pool_lv)) {
log_error("Aborting. Could not deactivate pool metadata %s.",
display_lvname(pool_lv));
return 0;
@@ -660,7 +660,7 @@ static struct logical_volume *_alloc_pool_metadata_spare(struct volume_group *vg
return_0;
/* Spare LV should not be active */
if (!deactivate_lv_local(vg->cmd, lv)) {
if (!deactivate_lv(vg->cmd, lv)) {
log_error("Unable to deactivate pool metadata spare LV. "
"Manual intervention required.");
return 0;

View File

@@ -19,12 +19,8 @@
#include "toolcontext.h"
#include "locking.h"
#include "defaults.h"
#include "lvmcache.h"
#include "lvmetad.h"
#include "display.h"
#include "label.h"
#include "archiver.h"
#include "lvm-signal.h"
static struct pv_segment *_alloc_pv_segment(struct dm_pool *mem,
struct physical_volume *pv,
@@ -84,8 +80,8 @@ int peg_dup(struct dm_pool *mem, struct dm_list *peg_new, struct dm_list *peg_ol
}
/* Find segment at a given physical extent in a PV */
static struct pv_segment *find_peg_by_pe(const struct physical_volume *pv,
uint32_t pe)
static struct pv_segment *_find_peg_by_pe(const struct physical_volume *pv,
uint32_t pe)
{
struct pv_segment *pvseg;
@@ -137,7 +133,7 @@ int pv_split_segment(struct dm_pool *mem,
if (pe == pv->pe_count)
goto out;
if (!(pvseg = find_peg_by_pe(pv, pe))) {
if (!(pvseg = _find_peg_by_pe(pv, pe))) {
log_error("Segment with extent %" PRIu32 " in PV %s not found",
pe, pv_dev_name(pv));
return 0;
@@ -158,7 +154,7 @@ out:
return 1;
}
static struct pv_segment null_pv_segment = {
static struct pv_segment _null_pv_segment = {
.pv = NULL,
.pe = 0,
};
@@ -172,7 +168,7 @@ struct pv_segment *assign_peg_to_lvseg(struct physical_volume *pv,
/* Missing format1 PV */
if (!pv)
return &null_pv_segment;
return &_null_pv_segment;
if (!pv_split_segment(seg->lv->vg->vgmem, pv, pe, &peg) ||
!pv_split_segment(seg->lv->vg->vgmem, pv, pe + area_len, NULL))
@@ -556,9 +552,7 @@ static int _extend_pv(struct physical_volume *pv, struct volume_group *vg,
* Resize a PV in a VG, adding or removing segments as needed.
* New size must fit within pv->size.
*/
static int pv_resize(struct physical_volume *pv,
struct volume_group *vg,
uint64_t size)
static int _pv_resize(struct physical_volume *pv, struct volume_group *vg, uint64_t size)
{
uint32_t old_pe_count, new_pe_count = 0;
@@ -674,7 +668,7 @@ int pv_resize_single(struct cmd_context *cmd,
log_verbose("Resizing volume \"%s\" to %" PRIu64 " sectors.",
pv_name, size);
if (!pv_resize(pv, vg, size))
if (!_pv_resize(pv, vg, size))
goto_out;
log_verbose("Updating physical volume \"%s\"", pv_name);
@@ -707,179 +701,3 @@ out:
"to repair from archived metadata.");
return r;
}
/*
* Decide whether it is "safe" to wipe the labels on this device.
* 0 indicates we may not.
*/
static int pvremove_check(struct cmd_context *cmd, const char *name,
unsigned force_count, unsigned prompt, struct dm_list *pvslist)
{
static const char really_wipe_msg[] = "Really WIPE LABELS from physical volume";
struct device *dev;
struct label *label;
struct pv_list *pvl;
struct physical_volume *pv = NULL;
int used;
int r = 0;
/* FIXME Check partition type is LVM unless --force is given */
if (!(dev = dev_cache_get(name, cmd->filter))) {
log_error("Device %s not found.", name);
return 0;
}
/* Is there a pv here already? */
/* If not, this is an error unless you used -f. */
if (!label_read(dev, &label, 0)) {
if (force_count)
return 1;
log_error("No PV label found on %s.", name);
return 0;
}
dm_list_iterate_items(pvl, pvslist)
if (pvl->pv->dev == dev)
pv = pvl->pv;
if (!pv) {
log_error(INTERNAL_ERROR "Physical Volume %s has a label, "
"but is neither in a VG nor orphan.", name);
goto out; /* better safe than sorry */
}
if (is_orphan(pv)) {
if ((used = is_used_pv(pv)) < 0)
goto_out;
if (used) {
log_warn("WARNING: PV %s is used by a VG but its metadata is missing.", name);
if (force_count < 2)
goto_bad;
if (!prompt &&
yes_no_prompt("%s \"%s\" that is marked as belonging to a VG [y/n]? ",
really_wipe_msg, name) == 'n')
goto_bad;
}
} else {
log_warn("WARNING: PV %s is used by VG %s (consider using vgreduce).", name, pv_vg_name(pv));
if (force_count < 2)
goto_bad;
if (!prompt &&
yes_no_prompt("%s \"%s\" of volume group \"%s\" [y/n]? ",
really_wipe_msg, name, pv_vg_name(pv)) == 'n')
goto_bad;
}
if (force_count)
log_warn("WARNING: Wiping physical volume label from "
"%s%s%s%s", name,
!is_orphan(pv) ? " of volume group \"" : "",
pv_vg_name(pv),
!is_orphan(pv) ? "\"" : "");
r = 1;
bad:
if (!r) {
log_error("%s: physical volume label not removed.", name);
if (force_count < 2) /* Show hint as log_error() */
log_error("(If you are certain you need pvremove, "
"then confirm by using --force twice.)");
}
out:
return r;
}
int pvremove_single(struct cmd_context *cmd, const char *pv_name,
void *handle __attribute__((unused)), unsigned force_count,
unsigned prompt, struct dm_list *pvslist)
{
struct device *dev;
struct lvmcache_info *info;
int r = 0;
if (!pvremove_check(cmd, pv_name, force_count, prompt, pvslist))
goto out;
if (!(dev = dev_cache_get(pv_name, cmd->filter))) {
log_error("%s: Couldn't find device. Check your filters?",
pv_name);
goto out;
}
info = lvmcache_info_from_pvid(dev->pvid, dev, 0);
if (!dev_test_excl(dev)) {
/* FIXME Detect whether device-mapper is still using the device */
log_error("Can't open %s exclusively - not removing. "
"Mounted filesystem?", dev_name(dev));
goto out;
}
/* Wipe existing label(s) */
if (!label_remove(dev)) {
log_error("Failed to wipe existing label(s) on %s", pv_name);
goto out;
}
if (info)
lvmcache_del(info);
if (!lvmetad_pv_gone_by_dev(dev))
goto_out;
log_print_unless_silent("Labels on physical volume \"%s\" successfully wiped",
pv_name);
r = 1;
out:
return r;
}
int pvremove_many(struct cmd_context *cmd, struct dm_list *pv_names,
unsigned force_count, unsigned prompt)
{
int ret = 1;
struct dm_list *pvslist = NULL;
struct pv_list *pvl;
const struct dm_str_list *pv_name;
if (!lock_vol(cmd, VG_ORPHANS, LCK_VG_WRITE, NULL)) {
log_error("Can't get lock for orphan PVs");
return 0;
}
lvmcache_seed_infos_from_lvmetad(cmd);
if (!(pvslist = get_pvs(cmd))) {
ret = 0;
goto_out;
}
dm_list_iterate_items(pv_name, pv_names) {
if (!pvremove_single(cmd, pv_name->str, NULL, force_count, prompt, pvslist)) {
stack;
ret = 0;
}
if (sigint_caught()) {
ret = 0;
goto_out;
}
}
out:
unlock_vg(cmd, NULL, VG_ORPHANS);
if (pvslist)
dm_list_iterate_items(pvl, pvslist)
free_pv_fid(pvl->pv);
return ret;
}

View File

@@ -502,7 +502,6 @@ static int _reset_flags_passed_to_kernel(struct logical_volume *lv, int *flags_r
return 0;
if (slv->status & LV_RESHAPE_DELTA_DISKS_MINUS) {
*flags_reset = 1;
slv->status |= LV_REMOVE_AFTER_RESHAPE;
seg_metalv(seg, s)->status |= LV_REMOVE_AFTER_RESHAPE;
}
@@ -1321,7 +1320,7 @@ static int _cmp_level(const struct segment_type *t1, const struct segment_type *
*
* Return 1 if same, else != 1
*/
static int is_same_level(const struct segment_type *t1, const struct segment_type *t2)
static int _is_same_level(const struct segment_type *t1, const struct segment_type *t2)
{
return _cmp_level(t1, t2);
}
@@ -1692,10 +1691,10 @@ static int _lv_alloc_reshape_space(struct logical_volume *lv,
static int _lv_free_reshape_space_with_status(struct logical_volume *lv, enum alloc_where *where_it_was)
{
uint32_t total_reshape_len;
enum alloc_where where;
struct lv_segment *seg = first_seg(lv);
if ((total_reshape_len = _reshape_len_per_lv(lv))) {
enum alloc_where where;
/*
* raid10:
*
@@ -1743,9 +1742,11 @@ static int _lv_free_reshape_space_with_status(struct logical_volume *lv, enum al
return_0;
lv->status &= ~LV_RESHAPE_DATA_OFFSET;
} else
where = alloc_none;
} else if (where_it_was)
*where_it_was = alloc_none;
if (where_it_was)
*where_it_was = where;
lv->status &= ~LV_RESHAPE;
@@ -1841,7 +1842,7 @@ static int _reshape_adjust_to_size(struct logical_volume *lv,
/* Externally visible LV size w/o reshape space */
lv->le_count = seg->len = new_le_count;
lv->size = (uint64_t) (lv->le_count - new_image_count * _reshape_len_per_dev(seg)) * lv->vg->extent_size;
lv->size = (lv->le_count - (uint64_t) new_image_count * _reshape_len_per_dev(seg)) * lv->vg->extent_size;
/* seg->area_len does not change */
if (old_image_count < new_image_count) {
@@ -1988,12 +1989,16 @@ static int _raid_reshape_remove_images(struct logical_volume *lv,
const unsigned new_stripes, const unsigned new_stripe_size,
struct dm_list *allocate_pvs, struct dm_list *removal_lvs)
{
uint32_t available_slvs, current_le_count, reduced_le_count, removed_slvs, s;
int stripe_size_changed;
uint32_t available_slvs, current_le_count, reduced_le_count, removed_slvs, s, stripe_size;
uint64_t extend_le_count;
unsigned devs_health, devs_in_sync;
struct lv_segment *seg = first_seg(lv);
struct lvinfo info = { 0 };
stripe_size = seg->stripe_size;
stripe_size_changed = new_stripe_size && (stripe_size != new_stripe_size);
if (seg_is_any_raid6(seg) && new_stripes < 3) {
log_error("Minimum 3 stripes required for %s LV %s.",
lvseg_name(seg), display_lvname(lv));
@@ -2118,7 +2123,15 @@ static int _raid_reshape_remove_images(struct logical_volume *lv,
return 0;
}
seg->stripe_size = new_stripe_size;
/* May allow stripe size changes > 2 legs */
if (new_image_count > 2)
seg->stripe_size = new_stripe_size;
else {
seg->stripe_size = stripe_size;
if (stripe_size_changed)
log_warn("WARNING: ignoring --stripesize on conversion of %s to 1 stripe.",
display_lvname(lv));
}
return 1;
}
@@ -2329,7 +2342,7 @@ static int _raid_reshape(struct logical_volume *lv,
if (!seg_is_reshapable_raid(seg))
return_0;
if (!is_same_level(seg->segtype, new_segtype))
if (!_is_same_level(seg->segtype, new_segtype))
return_0;
if (!(old_image_count = seg->area_count))
@@ -2508,7 +2521,7 @@ static int _reshape_requested(const struct logical_volume *lv, const struct segm
return 0;
/* Switching raid levels is a takeover, no reshape */
if (!is_same_level(seg->segtype, segtype))
if (!_is_same_level(seg->segtype, segtype))
return 0;
/* Possible takeover in case #data_copies == #stripes */
@@ -3426,6 +3439,12 @@ int lv_raid_split_and_track(struct logical_volume *lv,
int s;
struct lv_segment *seg = first_seg(lv);
if (is_lockd_type(lv->vg->lock_type)) {
log_error("Splitting raid image is not allowed with lock_type %s.",
lv->vg->lock_type);
return 0;
}
if (!seg_is_mirrored(seg)) {
log_error("Unable to split images from non-mirrored RAID.");
return 0;
@@ -5156,15 +5175,23 @@ static int _takeover_downconvert_wrapper(TAKEOVER_FN_ARGS)
return 0;
}
if (seg_is_any_raid5(seg) &&
segtype_is_raid1(new_segtype)) {
if (seg->area_count != 2) {
log_error("Can't convert %s LV %s to %s with != 2 legs.",
lvseg_name(seg), display_lvname(lv), new_segtype->name);
return 0;
if (seg_is_raid4(seg) || seg_is_any_raid5(seg)) {
if (segtype_is_raid1(new_segtype)) {
if (seg->area_count != 2) {
log_error("Can't convert %s LV %s to %s with != 2 legs.",
lvseg_name(seg), display_lvname(lv), new_segtype->name);
return 0;
}
if (seg->area_count != new_image_count) {
log_error(INTERNAL_ERROR "Bogus new_image_count converting %s LV %s to %s.",
lvseg_name(seg), display_lvname(lv), new_segtype->name);
return 0;
}
}
if (seg->area_count != new_image_count) {
log_error(INTERNAL_ERROR "Bogus new_image_count converting %s LV %s to %s.",
if ((segtype_is_striped_target(new_segtype) || segtype_is_any_raid0(new_segtype)) &&
seg->area_count < 3) {
log_error("Can't convert %s LV %s to %s with < 3 legs.",
lvseg_name(seg), display_lvname(lv), new_segtype->name);
return 0;
}
@@ -6034,7 +6061,7 @@ static int _set_convenient_raid145610_segtype_to(const struct lv_segment *seg_fr
const struct segment_type *segtype_sav = *segtype;
/* Bail out if same RAID level is requested. */
if (is_same_level(seg_from->segtype, *segtype))
if (_is_same_level(seg_from->segtype, *segtype))
return 1;
log_debug("Checking LV %s requested %s segment type for convenience",
@@ -6126,9 +6153,12 @@ static int _set_convenient_raid145610_segtype_to(const struct lv_segment *seg_fr
seg_flag = SEG_RAID6_N_6;
} else if (segtype_is_any_raid5(*segtype))
/* No result for raid6_{zr,nr,nc} */
if (!(seg_flag = _raid_seg_flag_6_to_5(seg_from)) ||
!(seg_flag & (*segtype)->flags))
if (!(seg_flag = _raid_seg_flag_6_to_5(seg_from)))
/*
* No result for raid6_{zr,nr,nc}.
*
* Offer to convert to corresponding raid6_*_6 type first.
*/
seg_flag = _raid_segtype_flag_5_to_6(*segtype);
/* -> raid1 */
@@ -6285,10 +6315,21 @@ static int _conversion_options_allowed(const struct lv_segment *seg_from,
r = 0;
}
/* Can't reshape stripes or stripe size when performing a takeover! */
if (!_is_same_level(seg_from->segtype, *segtype_to)) {
if (stripes && stripes != _data_rimages_count(seg_from, seg_from->area_count))
log_warn("WARNING: ignoring --stripes option on takeover of %s (reshape afterwards).",
display_lvname(seg_from->lv));
if (!seg_is_raid1(seg_from) && new_stripe_size_supplied)
log_warn("WARNING: ignoring --stripesize option on takeover of %s (reshape afterwards).",
display_lvname(seg_from->lv));
}
if (r &&
!yes &&
strcmp((*segtype_to)->name, SEG_TYPE_NAME_MIRROR) && /* "mirror" is prompted for later */
!is_same_level(seg_from->segtype, *segtype_to)) { /* Prompt here for takeover */
!_is_same_level(seg_from->segtype, *segtype_to)) { /* Prompt here for takeover */
const char *basic_fmt = "Are you sure you want to convert %s LV %s";
const char *type_fmt = " to %s type";
const char *question_fmt = "? [y/n]: ";
@@ -6398,7 +6439,7 @@ int lv_raid_convert(struct logical_volume *lv,
stripe_size = new_stripe_size_supplied ? new_stripe_size : seg->stripe_size;
if (segtype_is_striped(new_segtype))
new_image_count = stripes ? : seg->area_count;
new_image_count = stripes > 1 ? stripes : seg->area_count;
if (!_check_max_raid_devices(new_image_count))
return_0;
@@ -6417,7 +6458,7 @@ int lv_raid_convert(struct logical_volume *lv,
/* https://bugzilla.redhat.com/1439399 */
if (lv_is_origin(lv)) {
log_error("Can't convert snapshot origin %s.", display_lvname(lv));
log_error("Can't convert RAID LV %s while under snapshot.", display_lvname(lv));
return 0;
}
@@ -6457,11 +6498,20 @@ int lv_raid_convert(struct logical_volume *lv,
return 0;
}
/*
* stripes and stripe_size can only be changed via reshape, not in a takeover!
*
* Ignore any of them here unless a takeover from raid1 to
* raid4/5 is requested when stripe size may be defined.
*/
stripes = _data_rimages_count(seg, seg->area_count);
stripe_size = seg_is_raid1(seg) ? stripe_size : seg->stripe_size;
takeover_fn = _get_takeover_fn(first_seg(lv), new_segtype, new_image_count);
/* Exit without doing activation checks if the combination isn't possible */
if (_takeover_not_possible(takeover_fn))
return takeover_fn(lv, new_segtype, yes, force, new_image_count, 0, new_stripes, stripe_size,
return takeover_fn(lv, new_segtype, yes, force, new_image_count, 0, stripes, stripe_size,
region_size, allocate_pvs);
/*
@@ -6490,7 +6540,7 @@ int lv_raid_convert(struct logical_volume *lv,
lv->status &= ~LV_RESHAPE;
return takeover_fn(lv, new_segtype, yes, force, new_image_count, 0, new_stripes, stripe_size,
return takeover_fn(lv, new_segtype, yes, force, new_image_count, 0, stripes, stripe_size,
region_size, allocate_pvs);
}

View File

@@ -1,686 +0,0 @@
/*
* Copyright (C) 2009-2010 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "lib.h"
#include "locking.h"
#include "metadata.h"
#include "segtype.h"
#include "toolcontext.h"
#include "lvmetad.h"
/* Add lv as replicator_dev device */
int replicator_dev_add_rimage(struct replicator_device *rdev,
struct logical_volume *lv)
{
if (!lv || !rdev)
return_0;
if (lv_is_rimage(lv)) {
log_error("Logical volume %s is already part of other "
"replicator.", lv->name);
return 0;
}
if (rdev->lv) {
log_error("Logical volume %s can not be attached to an "
"already defined replicator device", lv->name);
return 0;
}
lv_set_hidden(lv);
lv->rdevice = rdev;
rdev->lv = lv;
return add_seg_to_segs_using_this_lv(lv, rdev->replicator_dev);
}
/* Remove lv from replicator_dev device */
struct logical_volume *replicator_dev_remove_rimage(struct replicator_device *rdev)
{
struct logical_volume *lv;
if (!rdev || !rdev->lv)
return_NULL;
lv = rdev->lv;
if (!remove_seg_from_segs_using_this_lv(lv, rdev->replicator_dev))
return_NULL;
/* FIXME: - check for site references */
rdev->lv = NULL;
lv->rdevice = NULL;
lv_set_visible(lv);
return lv;
}
int replicator_dev_add_slog(struct replicator_device *rdev,
struct logical_volume *slog)
{
if (!slog || !rdev)
return_0;
if (rdev->slog) {
log_error("Replicator device in site %s already has sync log.",
rdev->rsite->name);
return 0;
}
if (slog->rdevice) {
log_error("Sync log %s is already used by replicator %s.",
slog->name, slog->rdevice->rsite->replicator->name);
return 0;
}
lv_set_hidden(slog);
slog->rdevice = rdev;
rdev->slog = slog;
return add_seg_to_segs_using_this_lv(slog, rdev->replicator_dev);
}
struct logical_volume *replicator_dev_remove_slog(struct replicator_device *rdev)
{
struct logical_volume *lv;
if (!rdev)
return_NULL;
lv = rdev->slog;
if (!lv) {
log_error("Replicator device in site %s does not have sync log.",
rdev->rsite->name);
return NULL;
}
if (!remove_seg_from_segs_using_this_lv(lv, rdev->replicator_dev))
return_NULL;
rdev->slog = NULL;
lv->rdevice = NULL;
lv_set_visible(lv);
return lv;
}
int replicator_add_replicator_dev(struct logical_volume *replicator_lv,
struct lv_segment *replicator_dev_seg)
{
if (!replicator_lv)
return_0;
if (!(replicator_lv->status & REPLICATOR)) {
dm_list_init(&replicator_lv->rsites);
lv_set_hidden(replicator_lv);
replicator_lv->status |= REPLICATOR;
}
if (!replicator_dev_seg)
return 1;
if (replicator_dev_seg->replicator) {
log_error("Replicator device %s is already part of replicator.",
replicator_dev_seg->lv->name);
return 0;
}
replicator_dev_seg->replicator = replicator_lv;
return add_seg_to_segs_using_this_lv(replicator_lv, replicator_dev_seg);
}
/**
* Returns rimage ?? lv upon succeful detach of device
* entire LV entry should be removed by this crootall ??
*/
struct logical_volume *replicator_remove_replicator_dev(struct lv_segment *replicator_dev_seg)
{
struct logical_volume *lv = NULL;
log_error("FIXME: not implemented.");
#if 0
/* FIXME: - this is going to be complex.... */
if (!replicator_dev_seg)
return_NULL;
/* if slog or rimage - exit */
if (!remove_seg_from_segs_using_this_lv(lv, replicator_seg))
return_NULL;
replicator_seg->rlog_lv = NULL;
lv->status &= ~REPLICATOR_LOG;
lv_set_visible(lv);
#endif
return lv;
}
int replicator_add_rlog(struct lv_segment *replicator_seg,
struct logical_volume *rlog_lv)
{
if (!rlog_lv)
return_0;
if (rlog_lv->status & REPLICATOR_LOG) {
log_error("Rlog device %s is already used.", rlog_lv->name);
return 0;
}
lv_set_hidden(rlog_lv);
rlog_lv->status |= REPLICATOR_LOG;
replicator_seg->rlog_lv = rlog_lv;
return add_seg_to_segs_using_this_lv(rlog_lv, replicator_seg);
}
struct logical_volume *replicator_remove_rlog(struct lv_segment *replicator_seg)
{
struct logical_volume *lv;
if (!replicator_seg)
return_0;
if (!(lv = replicator_seg->rlog_lv)) {
log_error("Replog segment %s does not have rlog.",
replicator_seg->lv->name);
return NULL;
}
if (!remove_seg_from_segs_using_this_lv(lv, replicator_seg))
return_NULL;
replicator_seg->rlog_lv = NULL;
lv->status &= ~REPLICATOR_LOG;
lv_set_visible(lv);
return lv;
}
#if 0
/*
* Create new LV to pretend the original LV
* this target will have a 'replicator' segment
*/
int lv_add_replicator(struct logical_volume *origin, const char *rep_suffix)
{
struct logical_volume *rep_lv;
char *name;
size_t slen;
if (!(name = strstr(origin->name, rep_suffix))) {
log_error("Failed to find replicator suffix %s in LV name %s",
rep_suffix, origin->name);
return 0;
}
slen = (size_t)(name - origin->name);
name = alloca(slen + 1);
memcpy(name, origin->name, slen);
name[slen] = 0;
if ((rep_lv = find_lv(origin->vg, name))) {
rep_lv->status |= VIRTUAL;
return 1;
}
if (!(rep_lv = lv_create_empty(name, &origin->lvid,
LVM_READ | LVM_WRITE | VISIBLE_LV,
ALLOC_INHERIT, origin->vg)))
return_0;
if (!lv_add_virtual_segment(rep_lv, 0, origin->le_count,
get_segtype_from_string(origin->vg->cmd,
"error")))
return_0;
rep_lv->status |= VIRTUAL;
return 1;
}
int lv_remove_replicator(struct logical_volume *lv)
{
return 1;
}
#endif
/*
* Check all replicator structures:
* only non-clustered VG for Replicator
* only one segment in replicator LV
* site has correct combination of operation_mode parameters
* site and related devices have correct index numbers
* duplicate site names, site indexes, device names, device indexes
*/
int check_replicator_segment(const struct lv_segment *rseg)
{
struct replicator_site *rsite, *rsiteb;
struct replicator_device *rdev, *rdevb;
struct logical_volume *lv = rseg->lv;
int r = 1;
if (vg_is_clustered(lv->vg)) {
log_error("Volume Group %s of replicator %s is clustered",
lv->vg->name, lv->name);
return 0;
}
if (dm_list_size(&lv->segments) != 1) {
log_error("Replicator %s segment size %d != 1",
lv->name, dm_list_size(&lv->segments));
return 0;
}
dm_list_iterate_items(rsite, &lv->rsites) {
if (rsite->op_mode == DM_REPLICATOR_SYNC) {
if (rsite->fall_behind_timeout) {
log_error("Defined fall_behind_timeout="
"%d for sync replicator %s/%s.",
rsite->fall_behind_timeout, lv->name,
rsite->name);
r = 0;
}
if (rsite->fall_behind_ios) {
log_error("Defined fall_behind_ios="
"%d for sync replicator %s/%s.",
rsite->fall_behind_ios, lv->name, rsite->name);
r = 0;
}
if (rsite->fall_behind_data) {
log_error("Defined fall_behind_data="
FMTu64 " for sync replicator %s/%s.",
rsite->fall_behind_data, lv->name, rsite->name);
r = 0;
}
} else {
if (rsite->fall_behind_timeout && rsite->fall_behind_ios) {
log_error("Defined fall_behind_timeout and"
" fall_behind_ios for async replicator %s/%s.",
lv->name, rsite->name);
r = 0;
}
if (rsite->fall_behind_timeout && rsite->fall_behind_data) {
log_error("Defined fall_behind_timeout and"
" fall_behind_data for async replicator %s/%s.",
lv->name, rsite->name);
r = 0;
}
if (rsite->fall_behind_ios && rsite->fall_behind_data) {
log_error("Defined fall_behind_ios and"
" fall_behind_data for async replicator %s/%s.",
lv->name, rsite->name);
r = 0;
}
if (!rsite->fall_behind_ios &&
!rsite->fall_behind_data &&
!rsite->fall_behind_timeout) {
log_error("fall_behind_timeout,"
" fall_behind_ios and fall_behind_data are"
" undefined for async replicator %s/%s.",
lv->name, rsite->name);
r = 0;
}
}
dm_list_iterate_items(rsiteb, &lv->rsites) {
if (rsite == rsiteb)
break;
if (strcasecmp(rsite->name, rsiteb->name) == 0) {
log_error("Duplicate site name "
"%s detected for replicator %s.",
rsite->name, lv->name);
r = 0;
}
if ((rsite->vg_name && rsiteb->vg_name &&
strcasecmp(rsite->vg_name, rsiteb->vg_name) == 0) ||
(!rsite->vg_name && !rsiteb->vg_name)) {
log_error("Duplicate VG name "
"%s detected for replicator %s.",
(rsite->vg_name) ? rsite->vg_name : "<local>",
lv->name);
r = 0;
}
if (rsite->site_index == rsiteb->site_index) {
log_error("Duplicate site index %d detected "
"for replicator site %s/%s.",
rsite->site_index, lv->name,
rsite->name);
r = 0;
}
if (rsite->site_index > rseg->rsite_index_highest) {
log_error("Site index %d > %d (too high) "
"for replicator site %s/%s.",
rsite->site_index,
rseg->rsite_index_highest,
lv->name, rsite->name);
r = 0;
}
}
dm_list_iterate_items(rdev, &rsite->rdevices) {
dm_list_iterate_items(rdevb, &rsite->rdevices) {
if (rdev == rdevb)
break;
if (rdev->slog && (rdev->slog == rdevb->slog)) {
log_error("Duplicate sync log %s "
"detected for replicator %s.",
rdev->slog->name, lv->name);
r = 0;
}
if (strcasecmp(rdev->name, rdevb->name) == 0) {
log_error("Duplicate device name %s "
"detected for replicator %s.",
rdev->name, lv->name);
r = 0;
}
if (rdev->device_index == rdevb->device_index) {
log_error("Duplicate device index %"
PRId64 " detected for "
"replicator site %s/%s.",
rdev->device_index,
lv->name, rsite->name);
r = 0;
}
if (rdev->device_index > rseg->rdevice_index_highest) {
log_error("Device index %" PRIu64
" > %" PRIu64 " (too high) "
"for replicator site %s/%s.",
rdev->device_index,
rseg->rdevice_index_highest,
lv->name, rsite->name);
r = 0;
}
}
}
}
return r;
}
/**
* Is this segment part of active replicator
*/
int lv_is_active_replicator_dev(const struct logical_volume *lv)
{
return ((lv->status & REPLICATOR) &&
lv->rdevice &&
lv->rdevice->rsite &&
lv->rdevice->rsite->state == REPLICATOR_STATE_ACTIVE);
}
/**
* Is this LV replicator control device
*/
int lv_is_replicator(const struct logical_volume *lv)
{
return ((lv->status & REPLICATOR) &&
!dm_list_empty(&lv->segments) &&
seg_is_replicator(first_seg(lv)));
}
/**
* Is this LV replicator device
*/
int lv_is_replicator_dev(const struct logical_volume *lv)
{
return ((lv->status & REPLICATOR) &&
!dm_list_empty(&lv->segments) &&
seg_is_replicator_dev(first_seg(lv)));
}
/**
* Is this LV replicated origin lv
*/
int lv_is_rimage(const struct logical_volume *lv)
{
return (lv->rdevice && lv->rdevice->lv == lv);
}
/**
* Is this LV sync log
*/
int lv_is_slog(const struct logical_volume *lv)
{
return (lv->rdevice && lv->rdevice->slog == lv);
}
/**
* Returns first replicator-dev in site in case the LV is replicator-dev,
* NULL otherwise
*/
struct logical_volume *first_replicator_dev(const struct logical_volume *lv)
{
struct replicator_device *rdev;
struct replicator_site *rsite;
if (lv_is_replicator_dev(lv))
dm_list_iterate_items(rsite, &first_seg(lv)->replicator->rsites) {
dm_list_iterate_items(rdev, &rsite->rdevices)
return rdev->replicator_dev->lv;
break;
}
return NULL;
}
/**
* Add VG open parameters to sorted cmd_vg list.
*
* Maintain the alphabeticaly ordered list, avoid duplications.
*
* \return Returns newly created or already present cmd_vg entry,
* or NULL in error case.
*/
struct cmd_vg *cmd_vg_add(struct dm_pool *mem, struct dm_list *cmd_vgs,
const char *vg_name, const char *vgid,
uint32_t flags)
{
struct cmd_vg *cvl, *ins;
if (!vg_name && !vgid) {
log_error("Either vg_name or vgid must be set.");
return NULL;
}
/* Is it already in the list ? */
if ((cvl = cmd_vg_lookup(cmd_vgs, vg_name, vgid)))
return cvl;
if (!(cvl = dm_pool_zalloc(mem, sizeof(*cvl)))) {
log_error("Allocation of cmd_vg failed.");
return NULL;
}
if (vg_name && !(cvl->vg_name = dm_pool_strdup(mem, vg_name))) {
dm_pool_free(mem, cvl);
log_error("Allocation of vg_name failed.");
return NULL;
}
if (vgid && !(cvl->vgid = dm_pool_strdup(mem, vgid))) {
dm_pool_free(mem, cvl);
log_error("Allocation of vgid failed.");
return NULL;
}
cvl->flags = flags;
if (vg_name)
dm_list_iterate_items(ins, cmd_vgs)
if (strcmp(vg_name, ins->vg_name) < 0) {
cmd_vgs = &ins->list; /* new position */
break;
}
dm_list_add(cmd_vgs, &cvl->list);
return cvl;
}
/**
* Find cmd_vg with given vg_name in cmd_vgs list.
*
* \param cmd_vgs List of cmd_vg entries.
*
* \param vg_name Name of VG to be found.
* \param vgid UUID of VG to be found.
*
* \return Returns cmd_vg entry if vg_name or vgid is found,
* NULL otherwise.
*/
struct cmd_vg *cmd_vg_lookup(struct dm_list *cmd_vgs,
const char *vg_name, const char *vgid)
{
struct cmd_vg *cvl;
dm_list_iterate_items(cvl, cmd_vgs)
if ((vgid && cvl->vgid && !strcmp(vgid, cvl->vgid)) ||
(vg_name && cvl->vg_name && !strcmp(vg_name, cvl->vg_name)))
return cvl;
return NULL;
}
/**
* Read and lock multiple VGs stored in cmd_vgs list alphabeticaly.
* On the success list head pointer is set to VGs' cmd_vgs.
* (supports FAILED_INCONSISTENT)
*
* \param cmd_vg Contains list of cmd_vg entries.
*
* \return Returns 1 if all VG in cmd_vgs list are correctly
* openned and locked, 0 otherwise.
*/
int cmd_vg_read(struct cmd_context *cmd, struct dm_list *cmd_vgs)
{
struct cmd_vg *cvl;
/* Iterate through alphabeticaly ordered cmd_vg list */
dm_list_iterate_items(cvl, cmd_vgs) {
cvl->vg = vg_read(cmd, cvl->vg_name, cvl->vgid, cvl->flags, 0);
if (vg_read_error(cvl->vg)) {
log_debug_metadata("Failed to vg_read %s", cvl->vg_name);
return 0;
}
cvl->vg->cmd_vgs = cmd_vgs; /* Make it usable in VG */
}
return 1;
}
/**
* Release opened and locked VGs from list.
*
* \param cmd_vgs Contains list of cmd_vg entries.
*/
void free_cmd_vgs(struct dm_list *cmd_vgs)
{
struct cmd_vg *cvl;
/* Backward iterate cmd_vg list */
dm_list_iterate_back_items(cvl, cmd_vgs) {
if (vg_read_error(cvl->vg))
release_vg(cvl->vg);
else
unlock_and_release_vg(cvl->vg->cmd, cvl->vg, cvl->vg_name);
cvl->vg = NULL;
}
}
/**
* Find all needed remote VGs for processing given LV.
* Missing VGs are added to VG's cmd_vg list and flag cmd_missing_vgs is set.
*/
int find_replicator_vgs(const struct logical_volume *lv)
{
struct replicator_site *rsite;
int ret = 1;
if (!lv_is_replicator_dev(lv))
return 1;
dm_list_iterate_items(rsite, &first_seg(lv)->replicator->rsites) {
if (!rsite->vg_name || !lv->vg->cmd_vgs ||
cmd_vg_lookup(lv->vg->cmd_vgs, rsite->vg_name, NULL))
continue;
ret = 0;
/* Using cmd memory pool for cmd_vg list allocation */
if (!cmd_vg_add(lv->vg->cmd->mem, lv->vg->cmd_vgs,
rsite->vg_name, NULL, 0)) {
lv->vg->cmd_missing_vgs = 0; /* do not retry */
stack;
break;
}
log_debug_metadata("VG: %s added as missing.", rsite->vg_name);
lv->vg->cmd_missing_vgs++;
}
return ret;
}
/**
* Read all remote VGs from lv's replicator sites.
* Function is used in activation context and needs all VGs already locked.
*/
int lv_read_replicator_vgs(const struct logical_volume *lv)
{
struct replicator_device *rdev;
struct replicator_site *rsite;
struct volume_group *vg;
if (!lv_is_replicator_dev(lv))
return 1;
dm_list_iterate_items(rsite, &first_seg(lv)->replicator->rsites) {
if (!rsite->vg_name)
continue;
vg = vg_read(lv->vg->cmd, rsite->vg_name, 0, 0, 0); // READ_WITHOUT_LOCK
if (vg_read_error(vg)) {
log_error("Unable to read volume group %s",
rsite->vg_name);
goto bad;
}
rsite->vg = vg;
/* FIXME: handling missing LVs needs to be better */
dm_list_iterate_items(rdev, &rsite->rdevices)
if (!(rdev->lv = find_lv(vg, rdev->name))) {
log_error("Unable to find %s in volume group %s",
rdev->name, rsite->vg_name);
goto bad;
}
}
return 1;
bad:
lv_release_replicator_vgs(lv);
return 0;
}
/**
* Release all VG resources taken by lv's replicator sites.
* Function is used in activation context and needs all VGs already locked.
*/
void lv_release_replicator_vgs(const struct logical_volume *lv)
{
struct replicator_site *rsite;
if (!lv_is_replicator_dev(lv))
return;
dm_list_iterate_back_items(rsite, &first_seg(lv)->replicator->rsites)
if (rsite->vg_name && rsite->vg) {
release_vg(rsite->vg);
rsite->vg = NULL;
}
}

View File

@@ -36,8 +36,6 @@ struct dev_manager;
#define SEG_VIRTUAL (1ULL << 5)
#define SEG_CANNOT_BE_ZEROED (1ULL << 6)
#define SEG_MONITORED (1ULL << 7)
#define SEG_REPLICATOR (1ULL << 8)
#define SEG_REPLICATOR_DEV (1ULL << 9)
#define SEG_RAID (1ULL << 10)
#define SEG_THIN_POOL (1ULL << 11)
#define SEG_THIN_VOLUME (1ULL << 12)
@@ -165,7 +163,7 @@ struct dev_manager;
((segtype_is_striped(segtype) || segtype_is_mirror(segtype) || \
segtype_is_cache(segtype) || segtype_is_cache_pool(segtype) || \
segtype_is_thin(segtype) || segtype_is_snapshot(segtype) || \
(segtype_is_raid(segtype) && !segtype_is_raid1(segtype))) ? 1 : 0)
(segtype_is_striped_raid(segtype))) ? 1 : 0)
#define seg_is_striped_target(seg) segtype_is_striped_target((seg)->segtype)
#define seg_is_cache(seg) segtype_is_cache((seg)->segtype)
@@ -198,8 +196,6 @@ struct dev_manager;
#define seg_is_raid_with_meta(seg) segtype_is_raid_with_meta((seg)->segtype)
#define seg_is_striped_raid(seg) segtype_is_striped_raid((seg)->segtype)
#define seg_is_reshapable_raid(seg) segtype_is_reshapable_raid((seg)->segtype)
#define seg_is_replicator(seg) ((seg)->segtype->flags & SEG_REPLICATOR ? 1 : 0)
#define seg_is_replicator_dev(seg) ((seg)->segtype->flags & SEG_REPLICATOR_DEV ? 1 : 0)
#define seg_is_snapshot(seg) segtype_is_snapshot((seg)->segtype)
#define seg_is_striped(seg) segtype_is_striped((seg)->segtype)
#define seg_is_thin(seg) segtype_is_thin((seg)->segtype)
@@ -313,10 +309,6 @@ struct segment_type *init_unknown_segtype(struct cmd_context *cmd,
int init_raid_segtypes(struct cmd_context *cmd, struct segtype_library *seglib);
#endif
#ifdef REPLICATOR_INTERNAL
int init_replicator_segtype(struct cmd_context *cmd, struct segtype_library *seglib);
#endif
#define THIN_FEATURE_DISCARDS (1U << 0)
#define THIN_FEATURE_EXTERNAL_ORIGIN (1U << 1)
#define THIN_FEATURE_HELD_ROOT (1U << 2)

View File

@@ -386,3 +386,47 @@ int vg_remove_snapshot(struct logical_volume *cow)
return 1;
}
/* Check if given LV is usable as snapshot origin LV */
int validate_snapshot_origin(const struct logical_volume *origin_lv)
{
const char *err = NULL; /* For error string */
if (lv_is_cow(origin_lv))
err = "snapshots";
else if (lv_is_locked(origin_lv))
err = "locked volumes";
else if (lv_is_pvmove(origin_lv))
err = "pvmoved volumes";
else if (!lv_is_visible(origin_lv))
err = "hidden volumes";
else if (lv_is_merging_origin(origin_lv))
err = "an origin that has a merging snapshot";
else if (lv_is_cache_type(origin_lv) && !lv_is_cache(origin_lv))
err = "cache type volumes";
else if (lv_is_thin_type(origin_lv) && !lv_is_thin_volume(origin_lv))
err = "thin pool type volumes";
else if (lv_is_mirror_type(origin_lv)) {
if (!lv_is_mirror(origin_lv))
err = "mirror subvolumes";
else {
log_warn("WARNING: Snapshots of mirrors can deadlock under rare device failures.");
log_warn("WARNING: Consider using the raid1 mirror type to avoid this.");
log_warn("WARNING: See global/mirror_segtype_default in lvm.conf.");
}
} else if (lv_is_raid_type(origin_lv) && !lv_is_raid(origin_lv))
err = "raid subvolumes";
if (err) {
log_error("Snapshots of %s are not supported.", err);
return 0;
}
if (vg_is_clustered(origin_lv->vg) && lv_is_active(origin_lv) &&
!lv_is_active_exclusive_locally(origin_lv)) {
log_error("Snapshot origin must be active exclusively.");
return 0;
}
return 1;
}

View File

@@ -365,16 +365,19 @@ int pool_check_overprovisioning(const struct logical_volume *lv)
if (sz != UINT64_C(~0)) {
log_warn("WARNING: Sum of all thin volume sizes (%s) exceeds the "
"size of thin pool%s%s%s (%s)!",
"size of thin pool%s%s%s (%s).",
display_size(cmd, thinsum),
more_pools ? "" : " ",
more_pools ? "s" : display_lvname(pool_lv),
txt,
(sz > 0) ? display_size(cmd, sz) : "no free space in volume group");
if (max_threshold > 99 || !min_percent)
log_print_unless_silent("WARNING: You have not turned on protection against thin pools running out of space.");
if (max_threshold > 99)
log_print_unless_silent("For thin pool auto extension activation/thin_pool_autoextend_threshold should be below 100.");
log_print_unless_silent("WARNING: Set activation/thin_pool_autoextend_threshold below 100 to trigger automatic extension of thin pools before they get full.");
if (!min_percent)
log_print_unless_silent("For thin pool auto extension activation/thin_pool_autoextend_percent should be above 0.");
log_print_unless_silent("WARNING: Set activation/thin_pool_autoextend_percent above 0 to specify by how much to extend thin pools reaching the threshold.");
/* FIXME Also warn if there isn't sufficient free space for one pool extension to occur? */
}
return 1;

View File

@@ -71,7 +71,7 @@ struct volume_group *alloc_vg(const char *pool_name, struct cmd_context *cmd,
dm_list_init(&vg->removed_historical_lvs);
dm_list_init(&vg->removed_pvs);
log_debug_mem("Allocated VG %s at %p.", vg->name, vg);
log_debug_mem("Allocated VG %s at %p.", vg->name ? : "<no name>", vg);
return vg;
}
@@ -86,7 +86,7 @@ static void _free_vg(struct volume_group *vg)
return;
}
log_debug_mem("Freeing VG %s at %p.", vg->name, vg);
log_debug_mem("Freeing VG %s at %p.", vg->name ? : "<no name>", vg);
dm_hash_destroy(vg->hostnames);
dm_pool_destroy(vg->vgmem);
@@ -104,8 +104,6 @@ void release_vg(struct volume_group *vg)
release_vg(vg->vg_committed);
release_vg(vg->vg_precommitted);
if (vg->cft_precommitted)
dm_config_destroy(vg->cft_precommitted);
_free_vg(vg);
}

View File

@@ -47,8 +47,6 @@ struct volume_group {
struct format_instance *fid;
const struct format_type *original_fmt; /* Set when processing backup files */
struct lvmcache_vginfo *vginfo;
struct dm_list *cmd_vgs;/* List of wanted/locked and opened VGs */
uint32_t cmd_missing_vgs;/* Flag marks missing VG */
uint32_t seqno; /* Metadata sequence number */
unsigned skip_validate_lock_args : 1;
unsigned lvmetad_update_pending: 1;
@@ -61,8 +59,7 @@ struct volume_group {
* _vg_update_vg_committed.
*/
struct volume_group *vg_committed;
struct dm_config_tree *cft_precommitted; /* Precommitted metadata */
struct volume_group *vg_precommitted; /* Parsed from cft */
struct volume_group *vg_precommitted;
alloc_policy_t alloc;
struct profile *profile;

View File

@@ -285,6 +285,15 @@ static int _add_log(struct dm_pool *mem, struct lv_segment *seg,
if (!laopts->exclusive && vg_is_clustered(seg->lv->vg))
clustered = 1;
else if (seg->lv->vg->lock_type && !strcmp(seg->lv->vg->lock_type, "dlm")) {
/*
* If shared lock was used due to -asy, then we set clustered
* to use a clustered mirror log with cmirrod.
*/
if (seg->lv->vg->cmd->lockd_lv_sh)
clustered = 1;
}
if (seg->log_lv) {
/* If disk log, use its UUID */
if (!(log_dlid = build_dm_uuid(mem, seg->log_lv, NULL))) {
@@ -294,7 +303,7 @@ static int _add_log(struct dm_pool *mem, struct lv_segment *seg,
}
} else {
/* If core log, use mirror's UUID and set DM_CORELOG flag */
if (!(log_dlid = build_dm_uuid(mem, seg->lv, NULL))) {
if (!(log_dlid = build_dm_uuid(mem, seg->lv, lv_is_pvmove(seg->lv) ? "pvmove" : NULL))) {
log_error("Failed to build uuid for mirror LV %s.",
seg->lv->name);
return 0;

Some files were not shown because too many files have changed in this diff Show More